blob: 0d27e87e58fba4db51cc4e450f5421aaa72b26e9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
Daniel Veillardac4118d2008-01-11 05:27:32 +0000960 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillardac4118d2008-01-11 05:27:32 +0000974 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
975 return;
976
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000977 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
978 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000979 return;
980
981mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000982 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000983 return;
984}
985
Daniel Veillard4432df22003-09-28 18:58:27 +0000986/**
Daniel Veillardac4118d2008-01-11 05:27:32 +0000987 * xmlCleanSpecialAttrCallback:
988 *
989 * Removes CDATA attributes from the special attribute table
990 */
991static void
992xmlCleanSpecialAttrCallback(void *payload, void *data,
993 const xmlChar *fullname, const xmlChar *fullattr,
994 const xmlChar *unused ATTRIBUTE_UNUSED) {
995 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
996
Daniel Veillardb3edafd2008-01-11 08:00:57 +0000997 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +0000998 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
999 }
1000}
1001
1002/**
1003 * xmlCleanSpecialAttr:
1004 * @ctxt: an XML parser context
1005 *
1006 * Trim the list of attributes defined to remove all those of type
1007 * CDATA as they are not special. This call should be done when finishing
1008 * to parse the DTD and before starting to parse the document root.
1009 */
1010static void
1011xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1012{
1013 if (ctxt->attsSpecial == NULL)
1014 return;
1015
1016 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1017
1018 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1019 xmlHashFree(ctxt->attsSpecial, NULL);
1020 ctxt->attsSpecial = NULL;
1021 }
1022 return;
1023}
1024
1025/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001026 * xmlCheckLanguageID:
1027 * @lang: pointer to the string value
1028 *
1029 * Checks that the value conforms to the LanguageID production:
1030 *
1031 * NOTE: this is somewhat deprecated, those productions were removed from
1032 * the XML Second edition.
1033 *
1034 * [33] LanguageID ::= Langcode ('-' Subcode)*
1035 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1036 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1037 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1038 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1039 * [38] Subcode ::= ([a-z] | [A-Z])+
1040 *
1041 * Returns 1 if correct 0 otherwise
1042 **/
1043int
1044xmlCheckLanguageID(const xmlChar * lang)
1045{
1046 const xmlChar *cur = lang;
1047
1048 if (cur == NULL)
1049 return (0);
1050 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1051 ((cur[0] == 'I') && (cur[1] == '-'))) {
1052 /*
1053 * IANA code
1054 */
1055 cur += 2;
1056 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1057 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1058 cur++;
1059 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1060 ((cur[0] == 'X') && (cur[1] == '-'))) {
1061 /*
1062 * User code
1063 */
1064 cur += 2;
1065 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1066 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1067 cur++;
1068 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1069 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1070 /*
1071 * ISO639
1072 */
1073 cur++;
1074 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1075 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1076 cur++;
1077 else
1078 return (0);
1079 } else
1080 return (0);
1081 while (cur[0] != 0) { /* non input consuming */
1082 if (cur[0] != '-')
1083 return (0);
1084 cur++;
1085 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1086 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1087 cur++;
1088 else
1089 return (0);
1090 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1091 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1092 cur++;
1093 }
1094 return (1);
1095}
1096
Owen Taylor3473f882001-02-23 17:55:21 +00001097/************************************************************************
1098 * *
1099 * Parser stacks related functions and macros *
1100 * *
1101 ************************************************************************/
1102
1103xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1104 const xmlChar ** str);
1105
Daniel Veillard0fb18932003-09-07 09:14:37 +00001106#ifdef SAX2
1107/**
1108 * nsPush:
1109 * @ctxt: an XML parser context
1110 * @prefix: the namespace prefix or NULL
1111 * @URL: the namespace name
1112 *
1113 * Pushes a new parser namespace on top of the ns stack
1114 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001115 * Returns -1 in case of error, -2 if the namespace should be discarded
1116 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001117 */
1118static int
1119nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1120{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001121 if (ctxt->options & XML_PARSE_NSCLEAN) {
1122 int i;
1123 for (i = 0;i < ctxt->nsNr;i += 2) {
1124 if (ctxt->nsTab[i] == prefix) {
1125 /* in scope */
1126 if (ctxt->nsTab[i + 1] == URL)
1127 return(-2);
1128 /* out of scope keep it */
1129 break;
1130 }
1131 }
1132 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001133 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1134 ctxt->nsMax = 10;
1135 ctxt->nsNr = 0;
1136 ctxt->nsTab = (const xmlChar **)
1137 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1138 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001139 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->nsMax = 0;
1141 return (-1);
1142 }
1143 } else if (ctxt->nsNr >= ctxt->nsMax) {
1144 ctxt->nsMax *= 2;
1145 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001146 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001147 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1148 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001149 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 ctxt->nsMax /= 2;
1151 return (-1);
1152 }
1153 }
1154 ctxt->nsTab[ctxt->nsNr++] = prefix;
1155 ctxt->nsTab[ctxt->nsNr++] = URL;
1156 return (ctxt->nsNr);
1157}
1158/**
1159 * nsPop:
1160 * @ctxt: an XML parser context
1161 * @nr: the number to pop
1162 *
1163 * Pops the top @nr parser prefix/namespace from the ns stack
1164 *
1165 * Returns the number of namespaces removed
1166 */
1167static int
1168nsPop(xmlParserCtxtPtr ctxt, int nr)
1169{
1170 int i;
1171
1172 if (ctxt->nsTab == NULL) return(0);
1173 if (ctxt->nsNr < nr) {
1174 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1175 nr = ctxt->nsNr;
1176 }
1177 if (ctxt->nsNr <= 0)
1178 return (0);
1179
1180 for (i = 0;i < nr;i++) {
1181 ctxt->nsNr--;
1182 ctxt->nsTab[ctxt->nsNr] = NULL;
1183 }
1184 return(nr);
1185}
1186#endif
1187
1188static int
1189xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1190 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001191 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001192 int maxatts;
1193
1194 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001195 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001196 atts = (const xmlChar **)
1197 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001199 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001200 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1201 if (attallocs == NULL) goto mem_error;
1202 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001203 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001204 } else if (nr + 5 > ctxt->maxatts) {
1205 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001206 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1207 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001208 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001209 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1211 (maxatts / 5) * sizeof(int));
1212 if (attallocs == NULL) goto mem_error;
1213 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001214 ctxt->maxatts = maxatts;
1215 }
1216 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001217mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001218 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001219 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001220}
1221
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001222/**
1223 * inputPush:
1224 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001225 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001226 *
1227 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001228 *
1229 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001230 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001231int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001232inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1233{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001234 if ((ctxt == NULL) || (value == NULL))
1235 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 if (ctxt->inputNr >= ctxt->inputMax) {
1237 ctxt->inputMax *= 2;
1238 ctxt->inputTab =
1239 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1240 ctxt->inputMax *
1241 sizeof(ctxt->inputTab[0]));
1242 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001243 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001244 return (0);
1245 }
1246 }
1247 ctxt->inputTab[ctxt->inputNr] = value;
1248 ctxt->input = value;
1249 return (ctxt->inputNr++);
1250}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001251/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001252 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001253 * @ctxt: an XML parser context
1254 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001255 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001256 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001257 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001258 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001259xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260inputPop(xmlParserCtxtPtr ctxt)
1261{
1262 xmlParserInputPtr ret;
1263
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001264 if (ctxt == NULL)
1265 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001266 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001267 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->inputNr--;
1269 if (ctxt->inputNr > 0)
1270 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1271 else
1272 ctxt->input = NULL;
1273 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001274 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001275 return (ret);
1276}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001277/**
1278 * nodePush:
1279 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001280 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001281 *
1282 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001283 *
1284 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001285 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001286int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001287nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1288{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001289 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001290 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001291 xmlNodePtr *tmp;
1292
1293 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1294 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001296 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001297 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001298 return (0);
1299 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001300 ctxt->nodeTab = tmp;
1301 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001302 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001303 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001304 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001305 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1306 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001307 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001308 return(0);
1309 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001310 ctxt->nodeTab[ctxt->nodeNr] = value;
1311 ctxt->node = value;
1312 return (ctxt->nodeNr++);
1313}
1314/**
1315 * nodePop:
1316 * @ctxt: an XML parser context
1317 *
1318 * Pops the top element node from the node stack
1319 *
1320 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001321 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001322xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001323nodePop(xmlParserCtxtPtr ctxt)
1324{
1325 xmlNodePtr ret;
1326
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001327 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001328 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001329 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001330 ctxt->nodeNr--;
1331 if (ctxt->nodeNr > 0)
1332 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1333 else
1334 ctxt->node = NULL;
1335 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001336 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001337 return (ret);
1338}
Daniel Veillarda2351322004-06-27 12:08:10 +00001339
1340#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001341/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001342 * nameNsPush:
1343 * @ctxt: an XML parser context
1344 * @value: the element name
1345 * @prefix: the element prefix
1346 * @URI: the element namespace name
1347 *
1348 * Pushes a new element name/prefix/URL on top of the name stack
1349 *
1350 * Returns -1 in case of error, the index in the stack otherwise
1351 */
1352static int
1353nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1354 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1355{
1356 if (ctxt->nameNr >= ctxt->nameMax) {
1357 const xmlChar * *tmp;
1358 void **tmp2;
1359 ctxt->nameMax *= 2;
1360 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1361 ctxt->nameMax *
1362 sizeof(ctxt->nameTab[0]));
1363 if (tmp == NULL) {
1364 ctxt->nameMax /= 2;
1365 goto mem_error;
1366 }
1367 ctxt->nameTab = tmp;
1368 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1369 ctxt->nameMax * 3 *
1370 sizeof(ctxt->pushTab[0]));
1371 if (tmp2 == NULL) {
1372 ctxt->nameMax /= 2;
1373 goto mem_error;
1374 }
1375 ctxt->pushTab = tmp2;
1376 }
1377 ctxt->nameTab[ctxt->nameNr] = value;
1378 ctxt->name = value;
1379 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1380 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001381 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001382 return (ctxt->nameNr++);
1383mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001384 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001385 return (-1);
1386}
1387/**
1388 * nameNsPop:
1389 * @ctxt: an XML parser context
1390 *
1391 * Pops the top element/prefix/URI name from the name stack
1392 *
1393 * Returns the name just removed
1394 */
1395static const xmlChar *
1396nameNsPop(xmlParserCtxtPtr ctxt)
1397{
1398 const xmlChar *ret;
1399
1400 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001401 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 ctxt->nameNr--;
1403 if (ctxt->nameNr > 0)
1404 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1405 else
1406 ctxt->name = NULL;
1407 ret = ctxt->nameTab[ctxt->nameNr];
1408 ctxt->nameTab[ctxt->nameNr] = NULL;
1409 return (ret);
1410}
Daniel Veillarda2351322004-06-27 12:08:10 +00001411#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001412
1413/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001414 * namePush:
1415 * @ctxt: an XML parser context
1416 * @value: the element name
1417 *
1418 * Pushes a new element name on top of the name stack
1419 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001420 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001421 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001422int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001423namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001424{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001425 if (ctxt == NULL) return (-1);
1426
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001428 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001429 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001430 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001431 ctxt->nameMax *
1432 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001433 if (tmp == NULL) {
1434 ctxt->nameMax /= 2;
1435 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001436 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001437 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001438 }
1439 ctxt->nameTab[ctxt->nameNr] = value;
1440 ctxt->name = value;
1441 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001442mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001443 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001444 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445}
1446/**
1447 * namePop:
1448 * @ctxt: an XML parser context
1449 *
1450 * Pops the top element name from the name stack
1451 *
1452 * Returns the name just removed
1453 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001454const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001455namePop(xmlParserCtxtPtr ctxt)
1456{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001457 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001459 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1460 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001461 ctxt->nameNr--;
1462 if (ctxt->nameNr > 0)
1463 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1464 else
1465 ctxt->name = NULL;
1466 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001467 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001468 return (ret);
1469}
Owen Taylor3473f882001-02-23 17:55:21 +00001470
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001471static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001472 if (ctxt->spaceNr >= ctxt->spaceMax) {
1473 ctxt->spaceMax *= 2;
1474 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1475 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1476 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001477 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001478 return(0);
1479 }
1480 }
1481 ctxt->spaceTab[ctxt->spaceNr] = val;
1482 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1483 return(ctxt->spaceNr++);
1484}
1485
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001486static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001487 int ret;
1488 if (ctxt->spaceNr <= 0) return(0);
1489 ctxt->spaceNr--;
1490 if (ctxt->spaceNr > 0)
1491 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1492 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001493 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001494 ret = ctxt->spaceTab[ctxt->spaceNr];
1495 ctxt->spaceTab[ctxt->spaceNr] = -1;
1496 return(ret);
1497}
1498
1499/*
1500 * Macros for accessing the content. Those should be used only by the parser,
1501 * and not exported.
1502 *
1503 * Dirty macros, i.e. one often need to make assumption on the context to
1504 * use them
1505 *
1506 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1507 * To be used with extreme caution since operations consuming
1508 * characters may move the input buffer to a different location !
1509 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1510 * This should be used internally by the parser
1511 * only to compare to ASCII values otherwise it would break when
1512 * running with UTF-8 encoding.
1513 * RAW same as CUR but in the input buffer, bypass any token
1514 * extraction that may have been done
1515 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1516 * to compare on ASCII based substring.
1517 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001518 * strings without newlines within the parser.
1519 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1520 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001521 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1522 *
1523 * NEXT Skip to the next character, this does the proper decoding
1524 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001525 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001526 * CUR_CHAR(l) returns the current unicode character (int), set l
1527 * to the number of xmlChars used for the encoding [0-5].
1528 * CUR_SCHAR same but operate on a string instead of the context
1529 * COPY_BUF copy the current unicode char to the target buffer, increment
1530 * the index
1531 * GROW, SHRINK handling of input buffers
1532 */
1533
Daniel Veillardfdc91562002-07-01 21:52:03 +00001534#define RAW (*ctxt->input->cur)
1535#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001536#define NXT(val) ctxt->input->cur[(val)]
1537#define CUR_PTR ctxt->input->cur
1538
Daniel Veillarda07050d2003-10-19 14:46:32 +00001539#define CMP4( s, c1, c2, c3, c4 ) \
1540 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1541 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1542#define CMP5( s, c1, c2, c3, c4, c5 ) \
1543 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1544#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1545 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1546#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1547 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1548#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1549 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1550#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1551 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1552 ((unsigned char *) s)[ 8 ] == c9 )
1553#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1554 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1555 ((unsigned char *) s)[ 9 ] == c10 )
1556
Owen Taylor3473f882001-02-23 17:55:21 +00001557#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001558 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001559 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001560 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001561 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1562 xmlPopInput(ctxt); \
1563 } while (0)
1564
Daniel Veillard0b787f32004-03-26 17:29:53 +00001565#define SKIPL(val) do { \
1566 int skipl; \
1567 for(skipl=0; skipl<val; skipl++) { \
1568 if (*(ctxt->input->cur) == '\n') { \
1569 ctxt->input->line++; ctxt->input->col = 1; \
1570 } else ctxt->input->col++; \
1571 ctxt->nbChars++; \
1572 ctxt->input->cur++; \
1573 } \
1574 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1575 if ((*ctxt->input->cur == 0) && \
1576 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1577 xmlPopInput(ctxt); \
1578 } while (0)
1579
Daniel Veillarda880b122003-04-21 21:36:41 +00001580#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001581 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1582 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001583 xmlSHRINK (ctxt);
1584
1585static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1586 xmlParserInputShrink(ctxt->input);
1587 if ((*ctxt->input->cur == 0) &&
1588 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1589 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001590 }
Owen Taylor3473f882001-02-23 17:55:21 +00001591
Daniel Veillarda880b122003-04-21 21:36:41 +00001592#define GROW if ((ctxt->progressive == 0) && \
1593 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001594 xmlGROW (ctxt);
1595
1596static void xmlGROW (xmlParserCtxtPtr ctxt) {
1597 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1598 if ((*ctxt->input->cur == 0) &&
1599 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1600 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001601}
Owen Taylor3473f882001-02-23 17:55:21 +00001602
1603#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1604
1605#define NEXT xmlNextChar(ctxt)
1606
Daniel Veillard21a0f912001-02-25 19:54:14 +00001607#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001608 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001609 ctxt->input->cur++; \
1610 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001611 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001612 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1613 }
1614
Owen Taylor3473f882001-02-23 17:55:21 +00001615#define NEXTL(l) do { \
1616 if (*(ctxt->input->cur) == '\n') { \
1617 ctxt->input->line++; ctxt->input->col = 1; \
1618 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001619 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001620 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001621 } while (0)
1622
1623#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1624#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1625
1626#define COPY_BUF(l,b,i,v) \
1627 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001628 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001629
1630/**
1631 * xmlSkipBlankChars:
1632 * @ctxt: the XML parser context
1633 *
1634 * skip all blanks character found at that point in the input streams.
1635 * It pops up finished entities in the process if allowable at that point.
1636 *
1637 * Returns the number of space chars skipped
1638 */
1639
1640int
1641xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001642 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001643
1644 /*
1645 * It's Okay to use CUR/NEXT here since all the blanks are on
1646 * the ASCII range.
1647 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001648 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1649 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001650 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001651 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001652 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001653 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001654 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001655 if (*cur == '\n') {
1656 ctxt->input->line++; ctxt->input->col = 1;
1657 }
1658 cur++;
1659 res++;
1660 if (*cur == 0) {
1661 ctxt->input->cur = cur;
1662 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1663 cur = ctxt->input->cur;
1664 }
1665 }
1666 ctxt->input->cur = cur;
1667 } else {
1668 int cur;
1669 do {
1670 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001671 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001672 NEXT;
1673 cur = CUR;
1674 res++;
1675 }
1676 while ((cur == 0) && (ctxt->inputNr > 1) &&
1677 (ctxt->instate != XML_PARSER_COMMENT)) {
1678 xmlPopInput(ctxt);
1679 cur = CUR;
1680 }
1681 /*
1682 * Need to handle support of entities branching here
1683 */
1684 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1685 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1686 }
Owen Taylor3473f882001-02-23 17:55:21 +00001687 return(res);
1688}
1689
1690/************************************************************************
1691 * *
1692 * Commodity functions to handle entities *
1693 * *
1694 ************************************************************************/
1695
1696/**
1697 * xmlPopInput:
1698 * @ctxt: an XML parser context
1699 *
1700 * xmlPopInput: the current input pointed by ctxt->input came to an end
1701 * pop it and return the next char.
1702 *
1703 * Returns the current xmlChar in the parser context
1704 */
1705xmlChar
1706xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001707 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001708 if (xmlParserDebugEntities)
1709 xmlGenericError(xmlGenericErrorContext,
1710 "Popping input %d\n", ctxt->inputNr);
1711 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001712 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001713 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1714 return(xmlPopInput(ctxt));
1715 return(CUR);
1716}
1717
1718/**
1719 * xmlPushInput:
1720 * @ctxt: an XML parser context
1721 * @input: an XML parser input fragment (entity, XML fragment ...).
1722 *
1723 * xmlPushInput: switch to a new input stream which is stacked on top
1724 * of the previous one(s).
1725 */
1726void
1727xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1728 if (input == NULL) return;
1729
1730 if (xmlParserDebugEntities) {
1731 if ((ctxt->input != NULL) && (ctxt->input->filename))
1732 xmlGenericError(xmlGenericErrorContext,
1733 "%s(%d): ", ctxt->input->filename,
1734 ctxt->input->line);
1735 xmlGenericError(xmlGenericErrorContext,
1736 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1737 }
1738 inputPush(ctxt, input);
1739 GROW;
1740}
1741
1742/**
1743 * xmlParseCharRef:
1744 * @ctxt: an XML parser context
1745 *
1746 * parse Reference declarations
1747 *
1748 * [66] CharRef ::= '&#' [0-9]+ ';' |
1749 * '&#x' [0-9a-fA-F]+ ';'
1750 *
1751 * [ WFC: Legal Character ]
1752 * Characters referred to using character references must match the
1753 * production for Char.
1754 *
1755 * Returns the value parsed (as an int), 0 in case of error
1756 */
1757int
1758xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001759 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001761 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001762
Owen Taylor3473f882001-02-23 17:55:21 +00001763 /*
1764 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1765 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001766 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001767 (NXT(2) == 'x')) {
1768 SKIP(3);
1769 GROW;
1770 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001771 if (count++ > 20) {
1772 count = 0;
1773 GROW;
1774 }
1775 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001776 val = val * 16 + (CUR - '0');
1777 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1778 val = val * 16 + (CUR - 'a') + 10;
1779 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1780 val = val * 16 + (CUR - 'A') + 10;
1781 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001782 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001783 val = 0;
1784 break;
1785 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001786 if (val > 0x10FFFF)
1787 outofrange = val;
1788
Owen Taylor3473f882001-02-23 17:55:21 +00001789 NEXT;
1790 count++;
1791 }
1792 if (RAW == ';') {
1793 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001794 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001795 ctxt->nbChars ++;
1796 ctxt->input->cur++;
1797 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001798 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001799 SKIP(2);
1800 GROW;
1801 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001802 if (count++ > 20) {
1803 count = 0;
1804 GROW;
1805 }
1806 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001807 val = val * 10 + (CUR - '0');
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 NEXT;
1817 count++;
1818 }
1819 if (RAW == ';') {
1820 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001821 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001822 ctxt->nbChars ++;
1823 ctxt->input->cur++;
1824 }
1825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001827 }
1828
1829 /*
1830 * [ WFC: Legal Character ]
1831 * Characters referred to using character references must match the
1832 * production for Char.
1833 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001834 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001835 return(val);
1836 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001837 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1838 "xmlParseCharRef: invalid xmlChar value %d\n",
1839 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841 return(0);
1842}
1843
1844/**
1845 * xmlParseStringCharRef:
1846 * @ctxt: an XML parser context
1847 * @str: a pointer to an index in the string
1848 *
1849 * parse Reference declarations, variant parsing from a string rather
1850 * than an an input flow.
1851 *
1852 * [66] CharRef ::= '&#' [0-9]+ ';' |
1853 * '&#x' [0-9a-fA-F]+ ';'
1854 *
1855 * [ WFC: Legal Character ]
1856 * Characters referred to using character references must match the
1857 * production for Char.
1858 *
1859 * Returns the value parsed (as an int), 0 in case of error, str will be
1860 * updated to the current value of the index
1861 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001862static int
Owen Taylor3473f882001-02-23 17:55:21 +00001863xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1864 const xmlChar *ptr;
1865 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001866 unsigned int val = 0;
1867 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001868
1869 if ((str == NULL) || (*str == NULL)) return(0);
1870 ptr = *str;
1871 cur = *ptr;
1872 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1873 ptr += 3;
1874 cur = *ptr;
1875 while (cur != ';') { /* Non input consuming loop */
1876 if ((cur >= '0') && (cur <= '9'))
1877 val = val * 16 + (cur - '0');
1878 else if ((cur >= 'a') && (cur <= 'f'))
1879 val = val * 16 + (cur - 'a') + 10;
1880 else if ((cur >= 'A') && (cur <= 'F'))
1881 val = val * 16 + (cur - 'A') + 10;
1882 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001883 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001884 val = 0;
1885 break;
1886 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001887 if (val > 0x10FFFF)
1888 outofrange = val;
1889
Owen Taylor3473f882001-02-23 17:55:21 +00001890 ptr++;
1891 cur = *ptr;
1892 }
1893 if (cur == ';')
1894 ptr++;
1895 } else if ((cur == '&') && (ptr[1] == '#')){
1896 ptr += 2;
1897 cur = *ptr;
1898 while (cur != ';') { /* Non input consuming loops */
1899 if ((cur >= '0') && (cur <= '9'))
1900 val = val * 10 + (cur - '0');
1901 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001902 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 val = 0;
1904 break;
1905 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
Owen Taylor3473f882001-02-23 17:55:21 +00001909 ptr++;
1910 cur = *ptr;
1911 }
1912 if (cur == ';')
1913 ptr++;
1914 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001915 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001916 return(0);
1917 }
1918 *str = ptr;
1919
1920 /*
1921 * [ WFC: Legal Character ]
1922 * Characters referred to using character references must match the
1923 * production for Char.
1924 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001925 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001926 return(val);
1927 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001928 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1929 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1930 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001931 }
1932 return(0);
1933}
1934
1935/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001936 * xmlNewBlanksWrapperInputStream:
1937 * @ctxt: an XML parser context
1938 * @entity: an Entity pointer
1939 *
1940 * Create a new input stream for wrapping
1941 * blanks around a PEReference
1942 *
1943 * Returns the new input stream or NULL
1944 */
1945
1946static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1947
Daniel Veillardf4862f02002-09-10 11:13:43 +00001948static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001949xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1950 xmlParserInputPtr input;
1951 xmlChar *buffer;
1952 size_t length;
1953 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001954 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1955 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new blanks wrapper for entity: %s\n", entity->name);
1961 input = xmlNewInputStream(ctxt);
1962 if (input == NULL) {
1963 return(NULL);
1964 }
1965 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001966 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001967 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001968 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001969 return(NULL);
1970 }
1971 buffer [0] = ' ';
1972 buffer [1] = '%';
1973 buffer [length-3] = ';';
1974 buffer [length-2] = ' ';
1975 buffer [length-1] = 0;
1976 memcpy(buffer + 2, entity->name, length - 5);
1977 input->free = deallocblankswrapper;
1978 input->base = buffer;
1979 input->cur = buffer;
1980 input->length = length;
1981 input->end = &buffer[length];
1982 return(input);
1983}
1984
1985/**
Owen Taylor3473f882001-02-23 17:55:21 +00001986 * xmlParserHandlePEReference:
1987 * @ctxt: the parser context
1988 *
1989 * [69] PEReference ::= '%' Name ';'
1990 *
1991 * [ WFC: No Recursion ]
1992 * A parsed entity must not contain a recursive
1993 * reference to itself, either directly or indirectly.
1994 *
1995 * [ WFC: Entity Declared ]
1996 * In a document without any DTD, a document with only an internal DTD
1997 * subset which contains no parameter entity references, or a document
1998 * with "standalone='yes'", ... ... The declaration of a parameter
1999 * entity must precede any reference to it...
2000 *
2001 * [ VC: Entity Declared ]
2002 * In a document with an external subset or external parameter entities
2003 * with "standalone='no'", ... ... The declaration of a parameter entity
2004 * must precede any reference to it...
2005 *
2006 * [ WFC: In DTD ]
2007 * Parameter-entity references may only appear in the DTD.
2008 * NOTE: misleading but this is handled.
2009 *
2010 * A PEReference may have been detected in the current input stream
2011 * the handling is done accordingly to
2012 * http://www.w3.org/TR/REC-xml#entproc
2013 * i.e.
2014 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002015 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002016 */
2017void
2018xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002019 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002020 xmlEntityPtr entity = NULL;
2021 xmlParserInputPtr input;
2022
Owen Taylor3473f882001-02-23 17:55:21 +00002023 if (RAW != '%') return;
2024 switch(ctxt->instate) {
2025 case XML_PARSER_CDATA_SECTION:
2026 return;
2027 case XML_PARSER_COMMENT:
2028 return;
2029 case XML_PARSER_START_TAG:
2030 return;
2031 case XML_PARSER_END_TAG:
2032 return;
2033 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002034 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return;
2036 case XML_PARSER_PROLOG:
2037 case XML_PARSER_START:
2038 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 return;
2041 case XML_PARSER_ENTITY_DECL:
2042 case XML_PARSER_CONTENT:
2043 case XML_PARSER_ATTRIBUTE_VALUE:
2044 case XML_PARSER_PI:
2045 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002046 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002047 /* we just ignore it there */
2048 return;
2049 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002050 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002051 return;
2052 case XML_PARSER_ENTITY_VALUE:
2053 /*
2054 * NOTE: in the case of entity values, we don't do the
2055 * substitution here since we need the literal
2056 * entity value to be able to save the internal
2057 * subset of the document.
2058 * This will be handled by xmlStringDecodeEntities
2059 */
2060 return;
2061 case XML_PARSER_DTD:
2062 /*
2063 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2064 * In the internal DTD subset, parameter-entity references
2065 * can occur only where markup declarations can occur, not
2066 * within markup declarations.
2067 * In that case this is handled in xmlParseMarkupDecl
2068 */
2069 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2070 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002071 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002072 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002073 break;
2074 case XML_PARSER_IGNORE:
2075 return;
2076 }
2077
2078 NEXT;
2079 name = xmlParseName(ctxt);
2080 if (xmlParserDebugEntities)
2081 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002082 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002083 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002084 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002085 } else {
2086 if (RAW == ';') {
2087 NEXT;
2088 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2089 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2090 if (entity == NULL) {
2091
2092 /*
2093 * [ WFC: Entity Declared ]
2094 * In a document without any DTD, a document with only an
2095 * internal DTD subset which contains no parameter entity
2096 * references, or a document with "standalone='yes'", ...
2097 * ... The declaration of a parameter entity must precede
2098 * any reference to it...
2099 */
2100 if ((ctxt->standalone == 1) ||
2101 ((ctxt->hasExternalSubset == 0) &&
2102 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002103 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002104 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002105 } else {
2106 /*
2107 * [ VC: Entity Declared ]
2108 * In a document with an external subset or external
2109 * parameter entities with "standalone='no'", ...
2110 * ... The declaration of a parameter entity must precede
2111 * any reference to it...
2112 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002113 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2114 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2115 "PEReference: %%%s; not found\n",
2116 name);
2117 } else
2118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2119 "PEReference: %%%s; not found\n",
2120 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002121 ctxt->valid = 0;
2122 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002123 } else if (ctxt->input->free != deallocblankswrapper) {
2124 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2125 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002126 } else {
2127 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2128 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002129 xmlChar start[4];
2130 xmlCharEncoding enc;
2131
Owen Taylor3473f882001-02-23 17:55:21 +00002132 /*
2133 * handle the extra spaces added before and after
2134 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002135 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002136 */
2137 input = xmlNewEntityInputStream(ctxt, entity);
2138 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002139
2140 /*
2141 * Get the 4 first bytes and decode the charset
2142 * if enc != XML_CHAR_ENCODING_NONE
2143 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002144 * Note that, since we may have some non-UTF8
2145 * encoding (like UTF16, bug 135229), the 'length'
2146 * is not known, but we can calculate based upon
2147 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002148 */
2149 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002150 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002151 start[0] = RAW;
2152 start[1] = NXT(1);
2153 start[2] = NXT(2);
2154 start[3] = NXT(3);
2155 enc = xmlDetectCharEncoding(start, 4);
2156 if (enc != XML_CHAR_ENCODING_NONE) {
2157 xmlSwitchEncoding(ctxt, enc);
2158 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002159 }
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002162 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2163 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002164 xmlParseTextDecl(ctxt);
2165 }
Owen Taylor3473f882001-02-23 17:55:21 +00002166 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002167 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2168 "PEReference: %s is not a parameter entity\n",
2169 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002170 }
2171 }
2172 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002173 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002174 }
Owen Taylor3473f882001-02-23 17:55:21 +00002175 }
2176}
2177
2178/*
2179 * Macro used to grow the current buffer.
2180 */
2181#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002182 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002183 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002184 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002185 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002186 if (tmp == NULL) goto mem_error; \
2187 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002188}
2189
2190/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002191 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002192 * @ctxt: the parser context
2193 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002194 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002195 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2196 * @end: an end marker xmlChar, 0 if none
2197 * @end2: an end marker xmlChar, 0 if none
2198 * @end3: an end marker xmlChar, 0 if none
2199 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002200 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002201 *
2202 * [67] Reference ::= EntityRef | CharRef
2203 *
2204 * [69] PEReference ::= '%' Name ';'
2205 *
2206 * Returns A newly allocated string with the substitution done. The caller
2207 * must deallocate it !
2208 */
2209xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002210xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2211 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002212 xmlChar *buffer = NULL;
2213 int buffer_size = 0;
2214
2215 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002216 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 xmlEntityPtr ent;
2218 int c,l;
2219 int nbchars = 0;
2220
Daniel Veillarda82b1822004-11-08 16:24:57 +00002221 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002222 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002223 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002224
2225 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229
2230 /*
2231 * allocate a translation buffer.
2232 */
2233 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002234 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002235 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002236
2237 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002238 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002239 * we are operating on already parsed values.
2240 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002241 if (str < last)
2242 c = CUR_SCHAR(str, l);
2243 else
2244 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002245 while ((c != 0) && (c != end) && /* non input consuming loop */
2246 (c != end2) && (c != end3)) {
2247
2248 if (c == 0) break;
2249 if ((c == '&') && (str[1] == '#')) {
2250 int val = xmlParseStringCharRef(ctxt, &str);
2251 if (val != 0) {
2252 COPY_BUF(0,buffer,nbchars,val);
2253 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002254 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2255 growBuffer(buffer);
2256 }
Owen Taylor3473f882001-02-23 17:55:21 +00002257 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2258 if (xmlParserDebugEntities)
2259 xmlGenericError(xmlGenericErrorContext,
2260 "String decoding Entity Reference: %.30s\n",
2261 str);
2262 ent = xmlParseStringEntityRef(ctxt, &str);
2263 if ((ent != NULL) &&
2264 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2265 if (ent->content != NULL) {
2266 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002267 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2268 growBuffer(buffer);
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002271 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2272 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002273 }
2274 } else if ((ent != NULL) && (ent->content != NULL)) {
2275 xmlChar *rep;
2276
2277 ctxt->depth++;
2278 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2279 0, 0, 0);
2280 ctxt->depth--;
2281 if (rep != NULL) {
2282 current = rep;
2283 while (*current != 0) { /* non input consuming loop */
2284 buffer[nbchars++] = *current++;
2285 if (nbchars >
2286 buffer_size - XML_PARSER_BUFFER_SIZE) {
2287 growBuffer(buffer);
2288 }
2289 }
2290 xmlFree(rep);
2291 }
2292 } else if (ent != NULL) {
2293 int i = xmlStrlen(ent->name);
2294 const xmlChar *cur = ent->name;
2295
2296 buffer[nbchars++] = '&';
2297 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2298 growBuffer(buffer);
2299 }
2300 for (;i > 0;i--)
2301 buffer[nbchars++] = *cur++;
2302 buffer[nbchars++] = ';';
2303 }
2304 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2305 if (xmlParserDebugEntities)
2306 xmlGenericError(xmlGenericErrorContext,
2307 "String decoding PE Reference: %.30s\n", str);
2308 ent = xmlParseStringPEReference(ctxt, &str);
2309 if (ent != NULL) {
2310 xmlChar *rep;
2311
2312 ctxt->depth++;
2313 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2314 0, 0, 0);
2315 ctxt->depth--;
2316 if (rep != NULL) {
2317 current = rep;
2318 while (*current != 0) { /* non input consuming loop */
2319 buffer[nbchars++] = *current++;
2320 if (nbchars >
2321 buffer_size - XML_PARSER_BUFFER_SIZE) {
2322 growBuffer(buffer);
2323 }
2324 }
2325 xmlFree(rep);
2326 }
2327 }
2328 } else {
2329 COPY_BUF(l,buffer,nbchars,c);
2330 str += l;
2331 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2332 growBuffer(buffer);
2333 }
2334 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002335 if (str < last)
2336 c = CUR_SCHAR(str, l);
2337 else
2338 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 }
2340 buffer[nbchars++] = 0;
2341 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002342
2343mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002345 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002346}
2347
Daniel Veillarde57ec792003-09-10 10:50:59 +00002348/**
2349 * xmlStringDecodeEntities:
2350 * @ctxt: the parser context
2351 * @str: the input string
2352 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2353 * @end: an end marker xmlChar, 0 if none
2354 * @end2: an end marker xmlChar, 0 if none
2355 * @end3: an end marker xmlChar, 0 if none
2356 *
2357 * Takes a entity string content and process to do the adequate substitutions.
2358 *
2359 * [67] Reference ::= EntityRef | CharRef
2360 *
2361 * [69] PEReference ::= '%' Name ';'
2362 *
2363 * Returns A newly allocated string with the substitution done. The caller
2364 * must deallocate it !
2365 */
2366xmlChar *
2367xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2368 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002369 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002370 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2371 end, end2, end3));
2372}
Owen Taylor3473f882001-02-23 17:55:21 +00002373
2374/************************************************************************
2375 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002376 * Commodity functions, cleanup needed ? *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * areBlanks:
2382 * @ctxt: an XML parser context
2383 * @str: a xmlChar *
2384 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002385 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002386 *
2387 * Is this a sequence of blank chars that one can ignore ?
2388 *
2389 * Returns 1 if ignorable 0 otherwise.
2390 */
2391
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2393 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002394 int i, ret;
2395 xmlNodePtr lastChild;
2396
Daniel Veillard05c13a22001-09-09 08:38:09 +00002397 /*
2398 * Don't spend time trying to differentiate them, the same callback is
2399 * used !
2400 */
2401 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002402 return(0);
2403
Owen Taylor3473f882001-02-23 17:55:21 +00002404 /*
2405 * Check for xml:space value.
2406 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002407 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2408 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002409 return(0);
2410
2411 /*
2412 * Check that the string is made of blanks
2413 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002414 if (blank_chars == 0) {
2415 for (i = 0;i < len;i++)
2416 if (!(IS_BLANK_CH(str[i]))) return(0);
2417 }
Owen Taylor3473f882001-02-23 17:55:21 +00002418
2419 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002420 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002421 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002422 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002423 if (ctxt->myDoc != NULL) {
2424 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2425 if (ret == 0) return(1);
2426 if (ret == 1) return(0);
2427 }
2428
2429 /*
2430 * Otherwise, heuristic :-\
2431 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002432 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002433 if ((ctxt->node->children == NULL) &&
2434 (RAW == '<') && (NXT(1) == '/')) return(0);
2435
2436 lastChild = xmlGetLastChild(ctxt->node);
2437 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002438 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2439 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002440 } else if (xmlNodeIsText(lastChild))
2441 return(0);
2442 else if ((ctxt->node->children != NULL) &&
2443 (xmlNodeIsText(ctxt->node->children)))
2444 return(0);
2445 return(1);
2446}
2447
Owen Taylor3473f882001-02-23 17:55:21 +00002448/************************************************************************
2449 * *
2450 * Extra stuff for namespace support *
2451 * Relates to http://www.w3.org/TR/WD-xml-names *
2452 * *
2453 ************************************************************************/
2454
2455/**
2456 * xmlSplitQName:
2457 * @ctxt: an XML parser context
2458 * @name: an XML parser context
2459 * @prefix: a xmlChar **
2460 *
2461 * parse an UTF8 encoded XML qualified name string
2462 *
2463 * [NS 5] QName ::= (Prefix ':')? LocalPart
2464 *
2465 * [NS 6] Prefix ::= NCName
2466 *
2467 * [NS 7] LocalPart ::= NCName
2468 *
2469 * Returns the local part, and prefix is updated
2470 * to get the Prefix if any.
2471 */
2472
2473xmlChar *
2474xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2475 xmlChar buf[XML_MAX_NAMELEN + 5];
2476 xmlChar *buffer = NULL;
2477 int len = 0;
2478 int max = XML_MAX_NAMELEN;
2479 xmlChar *ret = NULL;
2480 const xmlChar *cur = name;
2481 int c;
2482
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002483 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002484 *prefix = NULL;
2485
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002486 if (cur == NULL) return(NULL);
2487
Owen Taylor3473f882001-02-23 17:55:21 +00002488#ifndef XML_XML_NAMESPACE
2489 /* xml: prefix is not really a namespace */
2490 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2491 (cur[2] == 'l') && (cur[3] == ':'))
2492 return(xmlStrdup(name));
2493#endif
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002496 if (cur[0] == ':')
2497 return(xmlStrdup(name));
2498
2499 c = *cur++;
2500 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2501 buf[len++] = c;
2502 c = *cur++;
2503 }
2504 if (len >= max) {
2505 /*
2506 * Okay someone managed to make a huge name, so he's ready to pay
2507 * for the processing speed.
2508 */
2509 max = len * 2;
2510
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002511 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002512 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return(NULL);
2515 }
2516 memcpy(buffer, buf, len);
2517 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2518 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002519 xmlChar *tmp;
2520
Owen Taylor3473f882001-02-23 17:55:21 +00002521 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 if (tmp == NULL) {
2525 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002526 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002527 return(NULL);
2528 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002529 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 }
2531 buffer[len++] = c;
2532 c = *cur++;
2533 }
2534 buffer[len] = 0;
2535 }
2536
Daniel Veillard597bc482003-07-24 16:08:28 +00002537 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002538 if (buffer != NULL)
2539 xmlFree(buffer);
2540 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002541 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002542 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002543
Owen Taylor3473f882001-02-23 17:55:21 +00002544 if (buffer == NULL)
2545 ret = xmlStrndup(buf, len);
2546 else {
2547 ret = buffer;
2548 buffer = NULL;
2549 max = XML_MAX_NAMELEN;
2550 }
2551
2552
2553 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002554 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002555 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002556 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002557 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002558 }
Owen Taylor3473f882001-02-23 17:55:21 +00002559 len = 0;
2560
Daniel Veillardbb284f42002-10-16 18:02:47 +00002561 /*
2562 * Check that the first character is proper to start
2563 * a new name
2564 */
2565 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2566 ((c >= 0x41) && (c <= 0x5A)) ||
2567 (c == '_') || (c == ':'))) {
2568 int l;
2569 int first = CUR_SCHAR(cur, l);
2570
2571 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002572 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002573 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002574 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002575 }
2576 }
2577 cur++;
2578
Owen Taylor3473f882001-02-23 17:55:21 +00002579 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2580 buf[len++] = c;
2581 c = *cur++;
2582 }
2583 if (len >= max) {
2584 /*
2585 * Okay someone managed to make a huge name, so he's ready to pay
2586 * for the processing speed.
2587 */
2588 max = len * 2;
2589
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002590 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002591 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002592 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002593 return(NULL);
2594 }
2595 memcpy(buffer, buf, len);
2596 while (c != 0) { /* tested bigname2.xml */
2597 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002598 xmlChar *tmp;
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002601 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002602 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002603 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002605 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002606 return(NULL);
2607 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002608 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002609 }
2610 buffer[len++] = c;
2611 c = *cur++;
2612 }
2613 buffer[len] = 0;
2614 }
2615
2616 if (buffer == NULL)
2617 ret = xmlStrndup(buf, len);
2618 else {
2619 ret = buffer;
2620 }
2621 }
2622
2623 return(ret);
2624}
2625
2626/************************************************************************
2627 * *
2628 * The parser itself *
2629 * Relates to http://www.w3.org/TR/REC-xml *
2630 * *
2631 ************************************************************************/
2632
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002633static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002634static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002635 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002636
Owen Taylor3473f882001-02-23 17:55:21 +00002637/**
2638 * xmlParseName:
2639 * @ctxt: an XML parser context
2640 *
2641 * parse an XML name.
2642 *
2643 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2644 * CombiningChar | Extender
2645 *
2646 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2647 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002648 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002649 *
2650 * Returns the Name parsed or NULL
2651 */
2652
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002653const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002654xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002655 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002656 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002657 int count = 0;
2658
2659 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002660
2661 /*
2662 * Accelerator for simple ASCII names
2663 */
2664 in = ctxt->input->cur;
2665 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2666 ((*in >= 0x41) && (*in <= 0x5A)) ||
2667 (*in == '_') || (*in == ':')) {
2668 in++;
2669 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2670 ((*in >= 0x41) && (*in <= 0x5A)) ||
2671 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002672 (*in == '_') || (*in == '-') ||
2673 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002674 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002675 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002676 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002677 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002678 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002679 ctxt->nbChars += count;
2680 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 if (ret == NULL)
2682 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002683 return(ret);
2684 }
2685 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002688
Daniel Veillard46de64e2002-05-29 08:21:33 +00002689/**
2690 * xmlParseNameAndCompare:
2691 * @ctxt: an XML parser context
2692 *
2693 * parse an XML name and compares for match
2694 * (specialized for endtag parsing)
2695 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002696 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2697 * and the name for mismatch
2698 */
2699
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002700static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002701xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002702 register const xmlChar *cmp = other;
2703 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002704 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002705
2706 GROW;
2707
2708 in = ctxt->input->cur;
2709 while (*in != 0 && *in == *cmp) {
2710 ++in;
2711 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002712 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002713 }
William M. Brack76e95df2003-10-18 16:20:14 +00002714 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002715 /* success */
2716 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002717 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002718 }
2719 /* failure (or end of input buffer), check with full function */
2720 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002721 /* strings coming from the dictionnary direct compare possible */
2722 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002723 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002724 }
2725 return ret;
2726}
2727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002729xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002730 int len = 0, l;
2731 int c;
2732 int count = 0;
2733
2734 /*
2735 * Handler for more complex cases
2736 */
2737 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 c = CUR_CHAR(l);
2739 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2740 (!IS_LETTER(c) && (c != '_') &&
2741 (c != ':'))) {
2742 return(NULL);
2743 }
2744
2745 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002746 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002747 (c == '.') || (c == '-') ||
2748 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002749 (IS_COMBINING(c)) ||
2750 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002751 if (count++ > 100) {
2752 count = 0;
2753 GROW;
2754 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002755 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 NEXTL(l);
2757 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002758 }
Daniel Veillard96688262005-08-23 18:14:12 +00002759 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2760 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002761 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002762}
2763
2764/**
2765 * xmlParseStringName:
2766 * @ctxt: an XML parser context
2767 * @str: a pointer to the string pointer (IN/OUT)
2768 *
2769 * parse an XML name.
2770 *
2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772 * CombiningChar | Extender
2773 *
2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2775 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002776 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002777 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002778 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002779 * is updated to the current location in the string.
2780 */
2781
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002782static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002783xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2784 xmlChar buf[XML_MAX_NAMELEN + 5];
2785 const xmlChar *cur = *str;
2786 int len = 0, l;
2787 int c;
2788
2789 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002790 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002791 (c != ':')) {
2792 return(NULL);
2793 }
2794
William M. Brack871611b2003-10-18 04:53:14 +00002795 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002796 (c == '.') || (c == '-') ||
2797 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002798 (IS_COMBINING(c)) ||
2799 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002800 COPY_BUF(l,buf,len,c);
2801 cur += l;
2802 c = CUR_SCHAR(cur, l);
2803 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 xmlChar *buffer;
2809 int max = len * 2;
2810
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002811 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002812 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002813 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002814 return(NULL);
2815 }
2816 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002817 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002818 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002819 (c == '.') || (c == '-') ||
2820 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002821 (IS_COMBINING(c)) ||
2822 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002823 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002824 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002825 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002826 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002827 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002828 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002829 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002831 return(NULL);
2832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002834 }
2835 COPY_BUF(l,buffer,len,c);
2836 cur += l;
2837 c = CUR_SCHAR(cur, l);
2838 }
2839 buffer[len] = 0;
2840 *str = cur;
2841 return(buffer);
2842 }
2843 }
2844 *str = cur;
2845 return(xmlStrndup(buf, len));
2846}
2847
2848/**
2849 * xmlParseNmtoken:
2850 * @ctxt: an XML parser context
2851 *
2852 * parse an XML Nmtoken.
2853 *
2854 * [7] Nmtoken ::= (NameChar)+
2855 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002856 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002857 *
2858 * Returns the Nmtoken parsed or NULL
2859 */
2860
2861xmlChar *
2862xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2863 xmlChar buf[XML_MAX_NAMELEN + 5];
2864 int len = 0, l;
2865 int c;
2866 int count = 0;
2867
2868 GROW;
2869 c = CUR_CHAR(l);
2870
William M. Brack871611b2003-10-18 04:53:14 +00002871 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002872 (c == '.') || (c == '-') ||
2873 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002874 (IS_COMBINING(c)) ||
2875 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002876 if (count++ > 100) {
2877 count = 0;
2878 GROW;
2879 }
2880 COPY_BUF(l,buf,len,c);
2881 NEXTL(l);
2882 c = CUR_CHAR(l);
2883 if (len >= XML_MAX_NAMELEN) {
2884 /*
2885 * Okay someone managed to make a huge token, so he's ready to pay
2886 * for the processing speed.
2887 */
2888 xmlChar *buffer;
2889 int max = len * 2;
2890
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002891 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002892 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002893 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002894 return(NULL);
2895 }
2896 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002897 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002898 (c == '.') || (c == '-') ||
2899 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002900 (IS_COMBINING(c)) ||
2901 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (count++ > 100) {
2903 count = 0;
2904 GROW;
2905 }
2906 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002907 xmlChar *tmp;
2908
Owen Taylor3473f882001-02-23 17:55:21 +00002909 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002910 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002911 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002913 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002914 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 return(NULL);
2916 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002917 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002918 }
2919 COPY_BUF(l,buffer,len,c);
2920 NEXTL(l);
2921 c = CUR_CHAR(l);
2922 }
2923 buffer[len] = 0;
2924 return(buffer);
2925 }
2926 }
2927 if (len == 0)
2928 return(NULL);
2929 return(xmlStrndup(buf, len));
2930}
2931
2932/**
2933 * xmlParseEntityValue:
2934 * @ctxt: an XML parser context
2935 * @orig: if non-NULL store a copy of the original entity value
2936 *
2937 * parse a value for ENTITY declarations
2938 *
2939 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2940 * "'" ([^%&'] | PEReference | Reference)* "'"
2941 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002942 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002943 */
2944
2945xmlChar *
2946xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2947 xmlChar *buf = NULL;
2948 int len = 0;
2949 int size = XML_PARSER_BUFFER_SIZE;
2950 int c, l;
2951 xmlChar stop;
2952 xmlChar *ret = NULL;
2953 const xmlChar *cur = NULL;
2954 xmlParserInputPtr input;
2955
2956 if (RAW == '"') stop = '"';
2957 else if (RAW == '\'') stop = '\'';
2958 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002960 return(NULL);
2961 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002962 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002963 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002964 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002965 return(NULL);
2966 }
2967
2968 /*
2969 * The content of the entity definition is copied in a buffer.
2970 */
2971
2972 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2973 input = ctxt->input;
2974 GROW;
2975 NEXT;
2976 c = CUR_CHAR(l);
2977 /*
2978 * NOTE: 4.4.5 Included in Literal
2979 * When a parameter entity reference appears in a literal entity
2980 * value, ... a single or double quote character in the replacement
2981 * text is always treated as a normal data character and will not
2982 * terminate the literal.
2983 * In practice it means we stop the loop only when back at parsing
2984 * the initial entity and the quote is found
2985 */
William M. Brack871611b2003-10-18 04:53:14 +00002986 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002987 (ctxt->input != input))) {
2988 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002989 xmlChar *tmp;
2990
Owen Taylor3473f882001-02-23 17:55:21 +00002991 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002992 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2993 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002994 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002995 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 return(NULL);
2997 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002998 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 COPY_BUF(l,buf,len,c);
3001 NEXTL(l);
3002 /*
3003 * Pop-up of finished entities.
3004 */
3005 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3006 xmlPopInput(ctxt);
3007
3008 GROW;
3009 c = CUR_CHAR(l);
3010 if (c == 0) {
3011 GROW;
3012 c = CUR_CHAR(l);
3013 }
3014 }
3015 buf[len] = 0;
3016
3017 /*
3018 * Raise problem w.r.t. '&' and '%' being used in non-entities
3019 * reference constructs. Note Charref will be handled in
3020 * xmlStringDecodeEntities()
3021 */
3022 cur = buf;
3023 while (*cur != 0) { /* non input consuming */
3024 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3025 xmlChar *name;
3026 xmlChar tmp = *cur;
3027
3028 cur++;
3029 name = xmlParseStringName(ctxt, &cur);
3030 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003031 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003032 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003033 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003034 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003035 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3036 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 if (name != NULL)
3040 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003041 if (*cur == 0)
3042 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003043 }
3044 cur++;
3045 }
3046
3047 /*
3048 * Then PEReference entities are substituted.
3049 */
3050 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003051 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003052 xmlFree(buf);
3053 } else {
3054 NEXT;
3055 /*
3056 * NOTE: 4.4.7 Bypassed
3057 * When a general entity reference appears in the EntityValue in
3058 * an entity declaration, it is bypassed and left as is.
3059 * so XML_SUBSTITUTE_REF is not set here.
3060 */
3061 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3062 0, 0, 0);
3063 if (orig != NULL)
3064 *orig = buf;
3065 else
3066 xmlFree(buf);
3067 }
3068
3069 return(ret);
3070}
3071
3072/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003073 * xmlParseAttValueComplex:
3074 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003075 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003076 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003077 *
3078 * parse a value for an attribute, this is the fallback function
3079 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003080 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003081 *
3082 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3083 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003084static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003085xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003086 xmlChar limit = 0;
3087 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003088 int len = 0;
3089 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003090 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003091 xmlChar *current = NULL;
3092 xmlEntityPtr ent;
3093
Owen Taylor3473f882001-02-23 17:55:21 +00003094 if (NXT(0) == '"') {
3095 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3096 limit = '"';
3097 NEXT;
3098 } else if (NXT(0) == '\'') {
3099 limit = '\'';
3100 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3101 NEXT;
3102 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003103 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003104 return(NULL);
3105 }
3106
3107 /*
3108 * allocate a translation buffer.
3109 */
3110 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003111 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003113
3114 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003115 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003116 */
3117 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003118 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003119 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003120 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003121 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003122 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003123 if (NXT(1) == '#') {
3124 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003125
Owen Taylor3473f882001-02-23 17:55:21 +00003126 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003127 if (ctxt->replaceEntities) {
3128 if (len > buf_size - 10) {
3129 growBuffer(buf);
3130 }
3131 buf[len++] = '&';
3132 } else {
3133 /*
3134 * The reparsing will be done in xmlStringGetNodeList()
3135 * called by the attribute() function in SAX.c
3136 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003140 buf[len++] = '&';
3141 buf[len++] = '#';
3142 buf[len++] = '3';
3143 buf[len++] = '8';
3144 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003147 if (len > buf_size - 10) {
3148 growBuffer(buf);
3149 }
Owen Taylor3473f882001-02-23 17:55:21 +00003150 len += xmlCopyChar(0, &buf[len], val);
3151 }
3152 } else {
3153 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003154 if ((ent != NULL) &&
3155 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3156 if (len > buf_size - 10) {
3157 growBuffer(buf);
3158 }
3159 if ((ctxt->replaceEntities == 0) &&
3160 (ent->content[0] == '&')) {
3161 buf[len++] = '&';
3162 buf[len++] = '#';
3163 buf[len++] = '3';
3164 buf[len++] = '8';
3165 buf[len++] = ';';
3166 } else {
3167 buf[len++] = ent->content[0];
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 } else if ((ent != NULL) &&
3170 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003171 xmlChar *rep;
3172
3173 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3174 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003175 XML_SUBSTITUTE_REF,
3176 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003177 if (rep != NULL) {
3178 current = rep;
3179 while (*current != 0) { /* non input consuming */
3180 buf[len++] = *current++;
3181 if (len > buf_size - 10) {
3182 growBuffer(buf);
3183 }
3184 }
3185 xmlFree(rep);
3186 }
3187 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003188 if (len > buf_size - 10) {
3189 growBuffer(buf);
3190 }
Owen Taylor3473f882001-02-23 17:55:21 +00003191 if (ent->content != NULL)
3192 buf[len++] = ent->content[0];
3193 }
3194 } else if (ent != NULL) {
3195 int i = xmlStrlen(ent->name);
3196 const xmlChar *cur = ent->name;
3197
3198 /*
3199 * This may look absurd but is needed to detect
3200 * entities problems
3201 */
3202 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3203 (ent->content != NULL)) {
3204 xmlChar *rep;
3205 rep = xmlStringDecodeEntities(ctxt, ent->content,
3206 XML_SUBSTITUTE_REF, 0, 0, 0);
3207 if (rep != NULL)
3208 xmlFree(rep);
3209 }
3210
3211 /*
3212 * Just output the reference
3213 */
3214 buf[len++] = '&';
3215 if (len > buf_size - i - 10) {
3216 growBuffer(buf);
3217 }
3218 for (;i > 0;i--)
3219 buf[len++] = *cur++;
3220 buf[len++] = ';';
3221 }
3222 }
3223 } else {
3224 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 if ((len != 0) || (!normalize)) {
3226 if ((!normalize) || (!in_space)) {
3227 COPY_BUF(l,buf,len,0x20);
3228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
3231 }
3232 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003233 }
3234 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003235 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003236 COPY_BUF(l,buf,len,c);
3237 if (len > buf_size - 10) {
3238 growBuffer(buf);
3239 }
3240 }
3241 NEXTL(l);
3242 }
3243 GROW;
3244 c = CUR_CHAR(l);
3245 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003246 if ((in_space) && (normalize)) {
3247 while (buf[len - 1] == 0x20) len--;
3248 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003249 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003250 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003251 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003252 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003253 if ((c != 0) && (!IS_CHAR(c))) {
3254 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3255 "invalid character in attribute value\n");
3256 } else {
3257 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3258 "AttValue: ' expected\n");
3259 }
Owen Taylor3473f882001-02-23 17:55:21 +00003260 } else
3261 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003262 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003263 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003264
3265mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003266 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003267 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003268}
3269
3270/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003271 * xmlParseAttValue:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse a value for an attribute
3275 * Note: the parser won't do substitution of entities here, this
3276 * will be handled later in xmlStringGetNodeList
3277 *
3278 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3279 * "'" ([^<&'] | Reference)* "'"
3280 *
3281 * 3.3.3 Attribute-Value Normalization:
3282 * Before the value of an attribute is passed to the application or
3283 * checked for validity, the XML processor must normalize it as follows:
3284 * - a character reference is processed by appending the referenced
3285 * character to the attribute value
3286 * - an entity reference is processed by recursively processing the
3287 * replacement text of the entity
3288 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3289 * appending #x20 to the normalized value, except that only a single
3290 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3291 * parsed entity or the literal entity value of an internal parsed entity
3292 * - other characters are processed by appending them to the normalized value
3293 * If the declared value is not CDATA, then the XML processor must further
3294 * process the normalized attribute value by discarding any leading and
3295 * trailing space (#x20) characters, and by replacing sequences of space
3296 * (#x20) characters by a single space (#x20) character.
3297 * All attributes for which no declaration has been read should be treated
3298 * by a non-validating parser as if declared CDATA.
3299 *
3300 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3301 */
3302
3303
3304xmlChar *
3305xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003306 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003307 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003308}
3309
3310/**
Owen Taylor3473f882001-02-23 17:55:21 +00003311 * xmlParseSystemLiteral:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse an XML Literal
3315 *
3316 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3317 *
3318 * Returns the SystemLiteral parsed or NULL
3319 */
3320
3321xmlChar *
3322xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3323 xmlChar *buf = NULL;
3324 int len = 0;
3325 int size = XML_PARSER_BUFFER_SIZE;
3326 int cur, l;
3327 xmlChar stop;
3328 int state = ctxt->instate;
3329 int count = 0;
3330
3331 SHRINK;
3332 if (RAW == '"') {
3333 NEXT;
3334 stop = '"';
3335 } else if (RAW == '\'') {
3336 NEXT;
3337 stop = '\'';
3338 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003339 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003340 return(NULL);
3341 }
3342
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003343 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003344 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003345 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 return(NULL);
3347 }
3348 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3349 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003350 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003351 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003352 xmlChar *tmp;
3353
Owen Taylor3473f882001-02-23 17:55:21 +00003354 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003355 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3356 if (tmp == NULL) {
3357 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003358 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003359 ctxt->instate = (xmlParserInputState) state;
3360 return(NULL);
3361 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003362 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003363 }
3364 count++;
3365 if (count > 50) {
3366 GROW;
3367 count = 0;
3368 }
3369 COPY_BUF(l,buf,len,cur);
3370 NEXTL(l);
3371 cur = CUR_CHAR(l);
3372 if (cur == 0) {
3373 GROW;
3374 SHRINK;
3375 cur = CUR_CHAR(l);
3376 }
3377 }
3378 buf[len] = 0;
3379 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003380 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003382 } else {
3383 NEXT;
3384 }
3385 return(buf);
3386}
3387
3388/**
3389 * xmlParsePubidLiteral:
3390 * @ctxt: an XML parser context
3391 *
3392 * parse an XML public literal
3393 *
3394 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3395 *
3396 * Returns the PubidLiteral parsed or NULL.
3397 */
3398
3399xmlChar *
3400xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3401 xmlChar *buf = NULL;
3402 int len = 0;
3403 int size = XML_PARSER_BUFFER_SIZE;
3404 xmlChar cur;
3405 xmlChar stop;
3406 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003407 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003408
3409 SHRINK;
3410 if (RAW == '"') {
3411 NEXT;
3412 stop = '"';
3413 } else if (RAW == '\'') {
3414 NEXT;
3415 stop = '\'';
3416 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003417 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003418 return(NULL);
3419 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003420 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003421 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003423 return(NULL);
3424 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003425 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003426 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003427 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003429 xmlChar *tmp;
3430
Owen Taylor3473f882001-02-23 17:55:21 +00003431 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003432 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3433 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003434 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003435 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003436 return(NULL);
3437 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003439 }
3440 buf[len++] = cur;
3441 count++;
3442 if (count > 50) {
3443 GROW;
3444 count = 0;
3445 }
3446 NEXT;
3447 cur = CUR;
3448 if (cur == 0) {
3449 GROW;
3450 SHRINK;
3451 cur = CUR;
3452 }
3453 }
3454 buf[len] = 0;
3455 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003457 } else {
3458 NEXT;
3459 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003460 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003461 return(buf);
3462}
3463
Daniel Veillard48b2f892001-02-25 16:11:03 +00003464void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003465
3466/*
3467 * used for the test in the inner loop of the char data testing
3468 */
3469static const unsigned char test_char_data[256] = {
3470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3502};
3503
Owen Taylor3473f882001-02-23 17:55:21 +00003504/**
3505 * xmlParseCharData:
3506 * @ctxt: an XML parser context
3507 * @cdata: int indicating whether we are within a CDATA section
3508 *
3509 * parse a CharData section.
3510 * if we are within a CDATA section ']]>' marks an end of section.
3511 *
3512 * The right angle bracket (>) may be represented using the string "&gt;",
3513 * and must, for compatibility, be escaped using "&gt;" or a character
3514 * reference when it appears in the string "]]>" in content, when that
3515 * string is not marking the end of a CDATA section.
3516 *
3517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3518 */
3519
3520void
3521xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003522 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003523 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003524 int line = ctxt->input->line;
3525 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003526 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003527
3528 SHRINK;
3529 GROW;
3530 /*
3531 * Accelerated common case where input don't need to be
3532 * modified before passing it to the handler.
3533 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003534 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003535 in = ctxt->input->cur;
3536 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003537get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003538 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003539 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003540 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003541 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003542 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003543 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003544 goto get_more_space;
3545 }
3546 if (*in == '<') {
3547 nbchar = in - ctxt->input->cur;
3548 if (nbchar > 0) {
3549 const xmlChar *tmp = ctxt->input->cur;
3550 ctxt->input->cur = in;
3551
Daniel Veillard34099b42004-11-04 17:34:35 +00003552 if ((ctxt->sax != NULL) &&
3553 (ctxt->sax->ignorableWhitespace !=
3554 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003556 if (ctxt->sax->ignorableWhitespace != NULL)
3557 ctxt->sax->ignorableWhitespace(ctxt->userData,
3558 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003559 } else {
3560 if (ctxt->sax->characters != NULL)
3561 ctxt->sax->characters(ctxt->userData,
3562 tmp, nbchar);
3563 if (*ctxt->space == -1)
3564 *ctxt->space = -2;
3565 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003566 } else if ((ctxt->sax != NULL) &&
3567 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003568 ctxt->sax->characters(ctxt->userData,
3569 tmp, nbchar);
3570 }
3571 }
3572 return;
3573 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003574
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003575get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003576 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003577 while (test_char_data[*in]) {
3578 in++;
3579 ccol++;
3580 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003581 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003582 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003583 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003584 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003585 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003586 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003587 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003588 }
3589 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003590 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003592 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003593 return;
3594 }
3595 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003596 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003597 goto get_more;
3598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003600 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003601 if ((ctxt->sax != NULL) &&
3602 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003603 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003604 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003605 const xmlChar *tmp = ctxt->input->cur;
3606 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003607
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003609 if (ctxt->sax->ignorableWhitespace != NULL)
3610 ctxt->sax->ignorableWhitespace(ctxt->userData,
3611 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData,
3615 tmp, nbchar);
3616 if (*ctxt->space == -1)
3617 *ctxt->space = -2;
3618 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003619 line = ctxt->input->line;
3620 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003621 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003622 if (ctxt->sax->characters != NULL)
3623 ctxt->sax->characters(ctxt->userData,
3624 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003625 line = ctxt->input->line;
3626 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003627 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003628 }
3629 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003630 if (*in == 0xD) {
3631 in++;
3632 if (*in == 0xA) {
3633 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003634 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003635 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003636 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003637 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003638 in--;
3639 }
3640 if (*in == '<') {
3641 return;
3642 }
3643 if (*in == '&') {
3644 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003645 }
3646 SHRINK;
3647 GROW;
3648 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003649 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003650 nbchar = 0;
3651 }
Daniel Veillard50582112001-03-26 22:52:16 +00003652 ctxt->input->line = line;
3653 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003654 xmlParseCharDataComplex(ctxt, cdata);
3655}
3656
Daniel Veillard01c13b52002-12-10 15:19:08 +00003657/**
3658 * xmlParseCharDataComplex:
3659 * @ctxt: an XML parser context
3660 * @cdata: int indicating whether we are within a CDATA section
3661 *
3662 * parse a CharData section.this is the fallback function
3663 * of xmlParseCharData() when the parsing requires handling
3664 * of non-ASCII characters.
3665 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003666void
3667xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003668 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3669 int nbchar = 0;
3670 int cur, l;
3671 int count = 0;
3672
3673 SHRINK;
3674 GROW;
3675 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003676 while ((cur != '<') && /* checked */
3677 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003678 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003679 if ((cur == ']') && (NXT(1) == ']') &&
3680 (NXT(2) == '>')) {
3681 if (cdata) break;
3682 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003683 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003684 }
3685 }
3686 COPY_BUF(l,buf,nbchar,cur);
3687 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003688 buf[nbchar] = 0;
3689
Owen Taylor3473f882001-02-23 17:55:21 +00003690 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003691 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003692 */
3693 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003694 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003695 if (ctxt->sax->ignorableWhitespace != NULL)
3696 ctxt->sax->ignorableWhitespace(ctxt->userData,
3697 buf, nbchar);
3698 } else {
3699 if (ctxt->sax->characters != NULL)
3700 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003701 if ((ctxt->sax->characters !=
3702 ctxt->sax->ignorableWhitespace) &&
3703 (*ctxt->space == -1))
3704 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003705 }
3706 }
3707 nbchar = 0;
3708 }
3709 count++;
3710 if (count > 50) {
3711 GROW;
3712 count = 0;
3713 }
3714 NEXTL(l);
3715 cur = CUR_CHAR(l);
3716 }
3717 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003718 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003719 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003720 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003721 */
3722 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003723 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (ctxt->sax->ignorableWhitespace != NULL)
3725 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3726 } else {
3727 if (ctxt->sax->characters != NULL)
3728 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003729 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3730 (*ctxt->space == -1))
3731 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003732 }
3733 }
3734 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003735 if ((cur != 0) && (!IS_CHAR(cur))) {
3736 /* Generate the error and skip the offending character */
3737 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3738 "PCDATA invalid Char value %d\n",
3739 cur);
3740 NEXTL(l);
3741 }
Owen Taylor3473f882001-02-23 17:55:21 +00003742}
3743
3744/**
3745 * xmlParseExternalID:
3746 * @ctxt: an XML parser context
3747 * @publicID: a xmlChar** receiving PubidLiteral
3748 * @strict: indicate whether we should restrict parsing to only
3749 * production [75], see NOTE below
3750 *
3751 * Parse an External ID or a Public ID
3752 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003753 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003754 * 'PUBLIC' S PubidLiteral S SystemLiteral
3755 *
3756 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3757 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3758 *
3759 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3760 *
3761 * Returns the function returns SystemLiteral and in the second
3762 * case publicID receives PubidLiteral, is strict is off
3763 * it is possible to return NULL and have publicID set.
3764 */
3765
3766xmlChar *
3767xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3768 xmlChar *URI = NULL;
3769
3770 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003771
3772 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003773 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003774 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003775 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3777 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003778 }
3779 SKIP_BLANKS;
3780 URI = xmlParseSystemLiteral(ctxt);
3781 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003784 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003785 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003786 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003788 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 SKIP_BLANKS;
3791 *publicID = xmlParsePubidLiteral(ctxt);
3792 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003793 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 }
3795 if (strict) {
3796 /*
3797 * We don't handle [83] so "S SystemLiteral" is required.
3798 */
William M. Brack76e95df2003-10-18 16:20:14 +00003799 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003800 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003801 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 } else {
3804 /*
3805 * We handle [83] so we return immediately, if
3806 * "S SystemLiteral" is not detected. From a purely parsing
3807 * point of view that's a nice mess.
3808 */
3809 const xmlChar *ptr;
3810 GROW;
3811
3812 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003813 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003814
William M. Brack76e95df2003-10-18 16:20:14 +00003815 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3817 }
3818 SKIP_BLANKS;
3819 URI = xmlParseSystemLiteral(ctxt);
3820 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003821 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 }
3824 return(URI);
3825}
3826
3827/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003828 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003829 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003830 * @buf: the already parsed part of the buffer
3831 * @len: number of bytes filles in the buffer
3832 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003833 *
3834 * Skip an XML (SGML) comment <!-- .... -->
3835 * The spec says that "For compatibility, the string "--" (double-hyphen)
3836 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003837 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003838 *
3839 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3840 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003841static void
3842xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003843 int q, ql;
3844 int r, rl;
3845 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003846 xmlParserInputPtr input = ctxt->input;
3847 int count = 0;
3848
Owen Taylor3473f882001-02-23 17:55:21 +00003849 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003850 len = 0;
3851 size = XML_PARSER_BUFFER_SIZE;
3852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3853 if (buf == NULL) {
3854 xmlErrMemory(ctxt, NULL);
3855 return;
3856 }
Owen Taylor3473f882001-02-23 17:55:21 +00003857 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003858 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003859 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003860 if (q == 0)
3861 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003862 if (!IS_CHAR(q)) {
3863 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3864 "xmlParseComment: invalid xmlChar value %d\n",
3865 q);
3866 xmlFree (buf);
3867 return;
3868 }
Owen Taylor3473f882001-02-23 17:55:21 +00003869 NEXTL(ql);
3870 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003871 if (r == 0)
3872 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003873 if (!IS_CHAR(r)) {
3874 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3875 "xmlParseComment: invalid xmlChar value %d\n",
3876 q);
3877 xmlFree (buf);
3878 return;
3879 }
Owen Taylor3473f882001-02-23 17:55:21 +00003880 NEXTL(rl);
3881 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003882 if (cur == 0)
3883 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003884 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 ((cur != '>') ||
3886 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003887 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003888 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003889 }
3890 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003891 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003892 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003893 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3894 if (new_buf == NULL) {
3895 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003896 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003897 return;
3898 }
William M. Bracka3215c72004-07-31 16:24:01 +00003899 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 }
3901 COPY_BUF(ql,buf,len,q);
3902 q = r;
3903 ql = rl;
3904 r = cur;
3905 rl = l;
3906
3907 count++;
3908 if (count > 50) {
3909 GROW;
3910 count = 0;
3911 }
3912 NEXTL(l);
3913 cur = CUR_CHAR(l);
3914 if (cur == 0) {
3915 SHRINK;
3916 GROW;
3917 cur = CUR_CHAR(l);
3918 }
3919 }
3920 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00003921 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003922 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003923 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00003924 } else if (!IS_CHAR(cur)) {
3925 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3926 "xmlParseComment: invalid xmlChar value %d\n",
3927 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 } else {
3929 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003930 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3931 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003932 }
3933 NEXT;
3934 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3935 (!ctxt->disableSAX))
3936 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 }
Daniel Veillardda629342007-08-01 07:49:06 +00003938 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003939 return;
3940not_terminated:
3941 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3942 "Comment not terminated\n", NULL);
3943 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00003944 return;
Owen Taylor3473f882001-02-23 17:55:21 +00003945}
Daniel Veillardda629342007-08-01 07:49:06 +00003946
Daniel Veillard4c778d82005-01-23 17:37:44 +00003947/**
3948 * xmlParseComment:
3949 * @ctxt: an XML parser context
3950 *
3951 * Skip an XML (SGML) comment <!-- .... -->
3952 * The spec says that "For compatibility, the string "--" (double-hyphen)
3953 * must not occur within comments. "
3954 *
3955 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3956 */
3957void
3958xmlParseComment(xmlParserCtxtPtr ctxt) {
3959 xmlChar *buf = NULL;
3960 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003961 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003962 xmlParserInputState state;
3963 const xmlChar *in;
3964 int nbchar = 0, ccol;
3965
3966 /*
3967 * Check that there is a comment right here.
3968 */
3969 if ((RAW != '<') || (NXT(1) != '!') ||
3970 (NXT(2) != '-') || (NXT(3) != '-')) return;
3971
3972 state = ctxt->instate;
3973 ctxt->instate = XML_PARSER_COMMENT;
3974 SKIP(4);
3975 SHRINK;
3976 GROW;
3977
3978 /*
3979 * Accelerated common case where input don't need to be
3980 * modified before passing it to the handler.
3981 */
3982 in = ctxt->input->cur;
3983 do {
3984 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003985 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003986 ctxt->input->line++; ctxt->input->col = 1;
3987 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003988 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003989 }
3990get_more:
3991 ccol = ctxt->input->col;
3992 while (((*in > '-') && (*in <= 0x7F)) ||
3993 ((*in >= 0x20) && (*in < '-')) ||
3994 (*in == 0x09)) {
3995 in++;
3996 ccol++;
3997 }
3998 ctxt->input->col = ccol;
3999 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004000 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004001 ctxt->input->line++; ctxt->input->col = 1;
4002 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004003 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004004 goto get_more;
4005 }
4006 nbchar = in - ctxt->input->cur;
4007 /*
4008 * save current set of data
4009 */
4010 if (nbchar > 0) {
4011 if ((ctxt->sax != NULL) &&
4012 (ctxt->sax->comment != NULL)) {
4013 if (buf == NULL) {
4014 if ((*in == '-') && (in[1] == '-'))
4015 size = nbchar + 1;
4016 else
4017 size = XML_PARSER_BUFFER_SIZE + nbchar;
4018 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4019 if (buf == NULL) {
4020 xmlErrMemory(ctxt, NULL);
4021 ctxt->instate = state;
4022 return;
4023 }
4024 len = 0;
4025 } else if (len + nbchar + 1 >= size) {
4026 xmlChar *new_buf;
4027 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4028 new_buf = (xmlChar *) xmlRealloc(buf,
4029 size * sizeof(xmlChar));
4030 if (new_buf == NULL) {
4031 xmlFree (buf);
4032 xmlErrMemory(ctxt, NULL);
4033 ctxt->instate = state;
4034 return;
4035 }
4036 buf = new_buf;
4037 }
4038 memcpy(&buf[len], ctxt->input->cur, nbchar);
4039 len += nbchar;
4040 buf[len] = 0;
4041 }
4042 }
4043 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004044 if (*in == 0xA) {
4045 in++;
4046 ctxt->input->line++; ctxt->input->col = 1;
4047 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004048 if (*in == 0xD) {
4049 in++;
4050 if (*in == 0xA) {
4051 ctxt->input->cur = in;
4052 in++;
4053 ctxt->input->line++; ctxt->input->col = 1;
4054 continue; /* while */
4055 }
4056 in--;
4057 }
4058 SHRINK;
4059 GROW;
4060 in = ctxt->input->cur;
4061 if (*in == '-') {
4062 if (in[1] == '-') {
4063 if (in[2] == '>') {
4064 SKIP(3);
4065 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4066 (!ctxt->disableSAX)) {
4067 if (buf != NULL)
4068 ctxt->sax->comment(ctxt->userData, buf);
4069 else
4070 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4071 }
4072 if (buf != NULL)
4073 xmlFree(buf);
4074 ctxt->instate = state;
4075 return;
4076 }
4077 if (buf != NULL)
4078 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4079 "Comment not terminated \n<!--%.50s\n",
4080 buf);
4081 else
4082 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4083 "Comment not terminated \n", NULL);
4084 in++;
4085 ctxt->input->col++;
4086 }
4087 in++;
4088 ctxt->input->col++;
4089 goto get_more;
4090 }
4091 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4092 xmlParseCommentComplex(ctxt, buf, len, size);
4093 ctxt->instate = state;
4094 return;
4095}
4096
Owen Taylor3473f882001-02-23 17:55:21 +00004097
4098/**
4099 * xmlParsePITarget:
4100 * @ctxt: an XML parser context
4101 *
4102 * parse the name of a PI
4103 *
4104 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4105 *
4106 * Returns the PITarget name or NULL
4107 */
4108
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004109const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004110xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004111 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004112
4113 name = xmlParseName(ctxt);
4114 if ((name != NULL) &&
4115 ((name[0] == 'x') || (name[0] == 'X')) &&
4116 ((name[1] == 'm') || (name[1] == 'M')) &&
4117 ((name[2] == 'l') || (name[2] == 'L'))) {
4118 int i;
4119 if ((name[0] == 'x') && (name[1] == 'm') &&
4120 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004121 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004122 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004123 return(name);
4124 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004126 return(name);
4127 }
4128 for (i = 0;;i++) {
4129 if (xmlW3CPIs[i] == NULL) break;
4130 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4131 return(name);
4132 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004133 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4134 "xmlParsePITarget: invalid name prefix 'xml'\n",
4135 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004136 }
4137 return(name);
4138}
4139
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004140#ifdef LIBXML_CATALOG_ENABLED
4141/**
4142 * xmlParseCatalogPI:
4143 * @ctxt: an XML parser context
4144 * @catalog: the PI value string
4145 *
4146 * parse an XML Catalog Processing Instruction.
4147 *
4148 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4149 *
4150 * Occurs only if allowed by the user and if happening in the Misc
4151 * part of the document before any doctype informations
4152 * This will add the given catalog to the parsing context in order
4153 * to be used if there is a resolution need further down in the document
4154 */
4155
4156static void
4157xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4158 xmlChar *URL = NULL;
4159 const xmlChar *tmp, *base;
4160 xmlChar marker;
4161
4162 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004163 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004164 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4165 goto error;
4166 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004167 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004168 if (*tmp != '=') {
4169 return;
4170 }
4171 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004172 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004173 marker = *tmp;
4174 if ((marker != '\'') && (marker != '"'))
4175 goto error;
4176 tmp++;
4177 base = tmp;
4178 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4179 if (*tmp == 0)
4180 goto error;
4181 URL = xmlStrndup(base, tmp - base);
4182 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004183 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004184 if (*tmp != 0)
4185 goto error;
4186
4187 if (URL != NULL) {
4188 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4189 xmlFree(URL);
4190 }
4191 return;
4192
4193error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004194 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4195 "Catalog PI syntax error: %s\n",
4196 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004197 if (URL != NULL)
4198 xmlFree(URL);
4199}
4200#endif
4201
Owen Taylor3473f882001-02-23 17:55:21 +00004202/**
4203 * xmlParsePI:
4204 * @ctxt: an XML parser context
4205 *
4206 * parse an XML Processing Instruction.
4207 *
4208 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4209 *
4210 * The processing is transfered to SAX once parsed.
4211 */
4212
4213void
4214xmlParsePI(xmlParserCtxtPtr ctxt) {
4215 xmlChar *buf = NULL;
4216 int len = 0;
4217 int size = XML_PARSER_BUFFER_SIZE;
4218 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004219 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 xmlParserInputState state;
4221 int count = 0;
4222
4223 if ((RAW == '<') && (NXT(1) == '?')) {
4224 xmlParserInputPtr input = ctxt->input;
4225 state = ctxt->instate;
4226 ctxt->instate = XML_PARSER_PI;
4227 /*
4228 * this is a Processing Instruction.
4229 */
4230 SKIP(2);
4231 SHRINK;
4232
4233 /*
4234 * Parse the target name and check for special support like
4235 * namespace.
4236 */
4237 target = xmlParsePITarget(ctxt);
4238 if (target != NULL) {
4239 if ((RAW == '?') && (NXT(1) == '>')) {
4240 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004241 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4242 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 SKIP(2);
4245
4246 /*
4247 * SAX: PI detected.
4248 */
4249 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4250 (ctxt->sax->processingInstruction != NULL))
4251 ctxt->sax->processingInstruction(ctxt->userData,
4252 target, NULL);
4253 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return;
4255 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004256 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004258 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 ctxt->instate = state;
4260 return;
4261 }
4262 cur = CUR;
4263 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004264 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4265 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 SKIP_BLANKS;
4268 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004269 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004270 ((cur != '?') || (NXT(1) != '>'))) {
4271 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004272 xmlChar *tmp;
4273
Owen Taylor3473f882001-02-23 17:55:21 +00004274 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004275 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4276 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004277 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004278 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004279 ctxt->instate = state;
4280 return;
4281 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004282 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 }
4284 count++;
4285 if (count > 50) {
4286 GROW;
4287 count = 0;
4288 }
4289 COPY_BUF(l,buf,len,cur);
4290 NEXTL(l);
4291 cur = CUR_CHAR(l);
4292 if (cur == 0) {
4293 SHRINK;
4294 GROW;
4295 cur = CUR_CHAR(l);
4296 }
4297 }
4298 buf[len] = 0;
4299 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004300 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4301 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 } else {
4303 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4305 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 SKIP(2);
4308
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004309#ifdef LIBXML_CATALOG_ENABLED
4310 if (((state == XML_PARSER_MISC) ||
4311 (state == XML_PARSER_START)) &&
4312 (xmlStrEqual(target, XML_CATALOG_PI))) {
4313 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4314 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4315 (allow == XML_CATA_ALLOW_ALL))
4316 xmlParseCatalogPI(ctxt, buf);
4317 }
4318#endif
4319
4320
Owen Taylor3473f882001-02-23 17:55:21 +00004321 /*
4322 * SAX: PI detected.
4323 */
4324 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4325 (ctxt->sax->processingInstruction != NULL))
4326 ctxt->sax->processingInstruction(ctxt->userData,
4327 target, buf);
4328 }
4329 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 ctxt->instate = state;
4334 }
4335}
4336
4337/**
4338 * xmlParseNotationDecl:
4339 * @ctxt: an XML parser context
4340 *
4341 * parse a notation declaration
4342 *
4343 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4344 *
4345 * Hence there is actually 3 choices:
4346 * 'PUBLIC' S PubidLiteral
4347 * 'PUBLIC' S PubidLiteral S SystemLiteral
4348 * and 'SYSTEM' S SystemLiteral
4349 *
4350 * See the NOTE on xmlParseExternalID().
4351 */
4352
4353void
4354xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004355 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 xmlChar *Pubid;
4357 xmlChar *Systemid;
4358
Daniel Veillarda07050d2003-10-19 14:46:32 +00004359 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004360 xmlParserInputPtr input = ctxt->input;
4361 SHRINK;
4362 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004363 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4365 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004366 return;
4367 }
4368 SKIP_BLANKS;
4369
Daniel Veillard76d66f42001-05-16 21:05:17 +00004370 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004372 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004373 return;
4374 }
William M. Brack76e95df2003-10-18 16:20:14 +00004375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004377 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004378 return;
4379 }
4380 SKIP_BLANKS;
4381
4382 /*
4383 * Parse the IDs.
4384 */
4385 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4386 SKIP_BLANKS;
4387
4388 if (RAW == '>') {
4389 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4391 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004392 }
4393 NEXT;
4394 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4395 (ctxt->sax->notationDecl != NULL))
4396 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004398 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400 if (Systemid != NULL) xmlFree(Systemid);
4401 if (Pubid != NULL) xmlFree(Pubid);
4402 }
4403}
4404
4405/**
4406 * xmlParseEntityDecl:
4407 * @ctxt: an XML parser context
4408 *
4409 * parse <!ENTITY declarations
4410 *
4411 * [70] EntityDecl ::= GEDecl | PEDecl
4412 *
4413 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4414 *
4415 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4416 *
4417 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4418 *
4419 * [74] PEDef ::= EntityValue | ExternalID
4420 *
4421 * [76] NDataDecl ::= S 'NDATA' S Name
4422 *
4423 * [ VC: Notation Declared ]
4424 * The Name must match the declared name of a notation.
4425 */
4426
4427void
4428xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004429 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004430 xmlChar *value = NULL;
4431 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004432 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 int isParameter = 0;
4434 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004435 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004436
Daniel Veillard4c778d82005-01-23 17:37:44 +00004437 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004438 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004439 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004440 SHRINK;
4441 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004442 skipped = SKIP_BLANKS;
4443 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004444 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4445 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004446 }
Owen Taylor3473f882001-02-23 17:55:21 +00004447
4448 if (RAW == '%') {
4449 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004450 skipped = SKIP_BLANKS;
4451 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4453 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004454 }
Owen Taylor3473f882001-02-23 17:55:21 +00004455 isParameter = 1;
4456 }
4457
Daniel Veillard76d66f42001-05-16 21:05:17 +00004458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4461 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004462 return;
4463 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004464 skipped = SKIP_BLANKS;
4465 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004466 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4467 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004468 }
Owen Taylor3473f882001-02-23 17:55:21 +00004469
Daniel Veillardf5582f12002-06-11 10:08:16 +00004470 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004471 /*
4472 * handle the various case of definitions...
4473 */
4474 if (isParameter) {
4475 if ((RAW == '"') || (RAW == '\'')) {
4476 value = xmlParseEntityValue(ctxt, &orig);
4477 if (value) {
4478 if ((ctxt->sax != NULL) &&
4479 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4480 ctxt->sax->entityDecl(ctxt->userData, name,
4481 XML_INTERNAL_PARAMETER_ENTITY,
4482 NULL, NULL, value);
4483 }
4484 } else {
4485 URI = xmlParseExternalID(ctxt, &literal, 1);
4486 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004487 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 if (URI) {
4490 xmlURIPtr uri;
4491
4492 uri = xmlParseURI((const char *) URI);
4493 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004494 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4495 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004496 /*
4497 * This really ought to be a well formedness error
4498 * but the XML Core WG decided otherwise c.f. issue
4499 * E26 of the XML erratas.
4500 */
Owen Taylor3473f882001-02-23 17:55:21 +00004501 } else {
4502 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004503 /*
4504 * Okay this is foolish to block those but not
4505 * invalid URIs.
4506 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004507 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004508 } else {
4509 if ((ctxt->sax != NULL) &&
4510 (!ctxt->disableSAX) &&
4511 (ctxt->sax->entityDecl != NULL))
4512 ctxt->sax->entityDecl(ctxt->userData, name,
4513 XML_EXTERNAL_PARAMETER_ENTITY,
4514 literal, URI, NULL);
4515 }
4516 xmlFreeURI(uri);
4517 }
4518 }
4519 }
4520 } else {
4521 if ((RAW == '"') || (RAW == '\'')) {
4522 value = xmlParseEntityValue(ctxt, &orig);
4523 if ((ctxt->sax != NULL) &&
4524 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4525 ctxt->sax->entityDecl(ctxt->userData, name,
4526 XML_INTERNAL_GENERAL_ENTITY,
4527 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004528 /*
4529 * For expat compatibility in SAX mode.
4530 */
4531 if ((ctxt->myDoc == NULL) ||
4532 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4533 if (ctxt->myDoc == NULL) {
4534 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4535 }
4536 if (ctxt->myDoc->intSubset == NULL)
4537 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4538 BAD_CAST "fake", NULL, NULL);
4539
Daniel Veillard1af9a412003-08-20 22:54:39 +00004540 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4541 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004542 }
Owen Taylor3473f882001-02-23 17:55:21 +00004543 } else {
4544 URI = xmlParseExternalID(ctxt, &literal, 1);
4545 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004546 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004547 }
4548 if (URI) {
4549 xmlURIPtr uri;
4550
4551 uri = xmlParseURI((const char *)URI);
4552 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004553 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4554 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004555 /*
4556 * This really ought to be a well formedness error
4557 * but the XML Core WG decided otherwise c.f. issue
4558 * E26 of the XML erratas.
4559 */
Owen Taylor3473f882001-02-23 17:55:21 +00004560 } else {
4561 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004562 /*
4563 * Okay this is foolish to block those but not
4564 * invalid URIs.
4565 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004566 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004567 }
4568 xmlFreeURI(uri);
4569 }
4570 }
William M. Brack76e95df2003-10-18 16:20:14 +00004571 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004572 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4573 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004576 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004577 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004578 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004579 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4580 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004581 }
4582 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004583 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004584 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4585 (ctxt->sax->unparsedEntityDecl != NULL))
4586 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4587 literal, URI, ndata);
4588 } else {
4589 if ((ctxt->sax != NULL) &&
4590 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4591 ctxt->sax->entityDecl(ctxt->userData, name,
4592 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4593 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004594 /*
4595 * For expat compatibility in SAX mode.
4596 * assuming the entity repalcement was asked for
4597 */
4598 if ((ctxt->replaceEntities != 0) &&
4599 ((ctxt->myDoc == NULL) ||
4600 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4601 if (ctxt->myDoc == NULL) {
4602 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4603 }
4604
4605 if (ctxt->myDoc->intSubset == NULL)
4606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4607 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004608 xmlSAX2EntityDecl(ctxt, name,
4609 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4610 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004611 }
Owen Taylor3473f882001-02-23 17:55:21 +00004612 }
4613 }
4614 }
4615 SKIP_BLANKS;
4616 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004617 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004618 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004619 } else {
4620 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004621 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4622 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004623 }
4624 NEXT;
4625 }
4626 if (orig != NULL) {
4627 /*
4628 * Ugly mechanism to save the raw entity value.
4629 */
4630 xmlEntityPtr cur = NULL;
4631
4632 if (isParameter) {
4633 if ((ctxt->sax != NULL) &&
4634 (ctxt->sax->getParameterEntity != NULL))
4635 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4636 } else {
4637 if ((ctxt->sax != NULL) &&
4638 (ctxt->sax->getEntity != NULL))
4639 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004640 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004641 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004642 }
Owen Taylor3473f882001-02-23 17:55:21 +00004643 }
4644 if (cur != NULL) {
4645 if (cur->orig != NULL)
4646 xmlFree(orig);
4647 else
4648 cur->orig = orig;
4649 } else
4650 xmlFree(orig);
4651 }
Owen Taylor3473f882001-02-23 17:55:21 +00004652 if (value != NULL) xmlFree(value);
4653 if (URI != NULL) xmlFree(URI);
4654 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004655 }
4656}
4657
4658/**
4659 * xmlParseDefaultDecl:
4660 * @ctxt: an XML parser context
4661 * @value: Receive a possible fixed default value for the attribute
4662 *
4663 * Parse an attribute default declaration
4664 *
4665 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4666 *
4667 * [ VC: Required Attribute ]
4668 * if the default declaration is the keyword #REQUIRED, then the
4669 * attribute must be specified for all elements of the type in the
4670 * attribute-list declaration.
4671 *
4672 * [ VC: Attribute Default Legal ]
4673 * The declared default value must meet the lexical constraints of
4674 * the declared attribute type c.f. xmlValidateAttributeDecl()
4675 *
4676 * [ VC: Fixed Attribute Default ]
4677 * if an attribute has a default value declared with the #FIXED
4678 * keyword, instances of that attribute must match the default value.
4679 *
4680 * [ WFC: No < in Attribute Values ]
4681 * handled in xmlParseAttValue()
4682 *
4683 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4684 * or XML_ATTRIBUTE_FIXED.
4685 */
4686
4687int
4688xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4689 int val;
4690 xmlChar *ret;
4691
4692 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004693 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004694 SKIP(9);
4695 return(XML_ATTRIBUTE_REQUIRED);
4696 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004697 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004698 SKIP(8);
4699 return(XML_ATTRIBUTE_IMPLIED);
4700 }
4701 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004702 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP(6);
4704 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004705 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4707 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004708 }
4709 SKIP_BLANKS;
4710 }
4711 ret = xmlParseAttValue(ctxt);
4712 ctxt->instate = XML_PARSER_DTD;
4713 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004714 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004715 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004716 } else
4717 *value = ret;
4718 return(val);
4719}
4720
4721/**
4722 * xmlParseNotationType:
4723 * @ctxt: an XML parser context
4724 *
4725 * parse an Notation attribute type.
4726 *
4727 * Note: the leading 'NOTATION' S part has already being parsed...
4728 *
4729 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4730 *
4731 * [ VC: Notation Attributes ]
4732 * Values of this type must match one of the notation names included
4733 * in the declaration; all notation names in the declaration must be declared.
4734 *
4735 * Returns: the notation attribute tree built while parsing
4736 */
4737
4738xmlEnumerationPtr
4739xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004741 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4742
4743 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004744 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 return(NULL);
4746 }
4747 SHRINK;
4748 do {
4749 NEXT;
4750 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004751 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4754 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004755 return(ret);
4756 }
4757 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004758 if (cur == NULL) return(ret);
4759 if (last == NULL) ret = last = cur;
4760 else {
4761 last->next = cur;
4762 last = cur;
4763 }
4764 SKIP_BLANKS;
4765 } while (RAW == '|');
4766 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004767 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004768 if ((last != NULL) && (last != ret))
4769 xmlFreeEnumeration(last);
4770 return(ret);
4771 }
4772 NEXT;
4773 return(ret);
4774}
4775
4776/**
4777 * xmlParseEnumerationType:
4778 * @ctxt: an XML parser context
4779 *
4780 * parse an Enumeration attribute type.
4781 *
4782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4783 *
4784 * [ VC: Enumeration ]
4785 * Values of this type must match one of the Nmtoken tokens in
4786 * the declaration
4787 *
4788 * Returns: the enumeration attribute tree built while parsing
4789 */
4790
4791xmlEnumerationPtr
4792xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4793 xmlChar *name;
4794 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4795
4796 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004797 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004798 return(NULL);
4799 }
4800 SHRINK;
4801 do {
4802 NEXT;
4803 SKIP_BLANKS;
4804 name = xmlParseNmtoken(ctxt);
4805 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004806 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004807 return(ret);
4808 }
4809 cur = xmlCreateEnumeration(name);
4810 xmlFree(name);
4811 if (cur == NULL) return(ret);
4812 if (last == NULL) ret = last = cur;
4813 else {
4814 last->next = cur;
4815 last = cur;
4816 }
4817 SKIP_BLANKS;
4818 } while (RAW == '|');
4819 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004820 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(ret);
4822 }
4823 NEXT;
4824 return(ret);
4825}
4826
4827/**
4828 * xmlParseEnumeratedType:
4829 * @ctxt: an XML parser context
4830 * @tree: the enumeration tree built while parsing
4831 *
4832 * parse an Enumerated attribute type.
4833 *
4834 * [57] EnumeratedType ::= NotationType | Enumeration
4835 *
4836 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4837 *
4838 *
4839 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4840 */
4841
4842int
4843xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004844 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004845 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 return(0);
4850 }
4851 SKIP_BLANKS;
4852 *tree = xmlParseNotationType(ctxt);
4853 if (*tree == NULL) return(0);
4854 return(XML_ATTRIBUTE_NOTATION);
4855 }
4856 *tree = xmlParseEnumerationType(ctxt);
4857 if (*tree == NULL) return(0);
4858 return(XML_ATTRIBUTE_ENUMERATION);
4859}
4860
4861/**
4862 * xmlParseAttributeType:
4863 * @ctxt: an XML parser context
4864 * @tree: the enumeration tree built while parsing
4865 *
4866 * parse the Attribute list def for an element
4867 *
4868 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4869 *
4870 * [55] StringType ::= 'CDATA'
4871 *
4872 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4873 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4874 *
4875 * Validity constraints for attribute values syntax are checked in
4876 * xmlValidateAttributeValue()
4877 *
4878 * [ VC: ID ]
4879 * Values of type ID must match the Name production. A name must not
4880 * appear more than once in an XML document as a value of this type;
4881 * i.e., ID values must uniquely identify the elements which bear them.
4882 *
4883 * [ VC: One ID per Element Type ]
4884 * No element type may have more than one ID attribute specified.
4885 *
4886 * [ VC: ID Attribute Default ]
4887 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4888 *
4889 * [ VC: IDREF ]
4890 * Values of type IDREF must match the Name production, and values
4891 * of type IDREFS must match Names; each IDREF Name must match the value
4892 * of an ID attribute on some element in the XML document; i.e. IDREF
4893 * values must match the value of some ID attribute.
4894 *
4895 * [ VC: Entity Name ]
4896 * Values of type ENTITY must match the Name production, values
4897 * of type ENTITIES must match Names; each Entity Name must match the
4898 * name of an unparsed entity declared in the DTD.
4899 *
4900 * [ VC: Name Token ]
4901 * Values of type NMTOKEN must match the Nmtoken production; values
4902 * of type NMTOKENS must match Nmtokens.
4903 *
4904 * Returns the attribute type
4905 */
4906int
4907xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4908 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004909 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004910 SKIP(5);
4911 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004912 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004913 SKIP(6);
4914 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004915 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004916 SKIP(5);
4917 return(XML_ATTRIBUTE_IDREF);
4918 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4919 SKIP(2);
4920 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004921 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004922 SKIP(6);
4923 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004924 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004925 SKIP(8);
4926 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004927 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 SKIP(8);
4929 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004930 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004931 SKIP(7);
4932 return(XML_ATTRIBUTE_NMTOKEN);
4933 }
4934 return(xmlParseEnumeratedType(ctxt, tree));
4935}
4936
4937/**
4938 * xmlParseAttributeListDecl:
4939 * @ctxt: an XML parser context
4940 *
4941 * : parse the Attribute list def for an element
4942 *
4943 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4944 *
4945 * [53] AttDef ::= S Name S AttType S DefaultDecl
4946 *
4947 */
4948void
4949xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004950 const xmlChar *elemName;
4951 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004952 xmlEnumerationPtr tree;
4953
Daniel Veillarda07050d2003-10-19 14:46:32 +00004954 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004955 xmlParserInputPtr input = ctxt->input;
4956
4957 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004960 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004963 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004964 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004965 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4966 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 return;
4968 }
4969 SKIP_BLANKS;
4970 GROW;
4971 while (RAW != '>') {
4972 const xmlChar *check = CUR_PTR;
4973 int type;
4974 int def;
4975 xmlChar *defaultValue = NULL;
4976
4977 GROW;
4978 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004979 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004980 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004981 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4982 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004983 break;
4984 }
4985 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004986 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004988 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004989 break;
4990 }
4991 SKIP_BLANKS;
4992
4993 type = xmlParseAttributeType(ctxt, &tree);
4994 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 break;
4996 }
4997
4998 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004999 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 if (tree != NULL)
5003 xmlFreeEnumeration(tree);
5004 break;
5005 }
5006 SKIP_BLANKS;
5007
5008 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5009 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005010 if (defaultValue != NULL)
5011 xmlFree(defaultValue);
5012 if (tree != NULL)
5013 xmlFreeEnumeration(tree);
5014 break;
5015 }
5016
5017 GROW;
5018 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005019 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005021 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005022 if (defaultValue != NULL)
5023 xmlFree(defaultValue);
5024 if (tree != NULL)
5025 xmlFreeEnumeration(tree);
5026 break;
5027 }
5028 SKIP_BLANKS;
5029 }
5030 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5032 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005033 if (defaultValue != NULL)
5034 xmlFree(defaultValue);
5035 if (tree != NULL)
5036 xmlFreeEnumeration(tree);
5037 break;
5038 }
5039 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5040 (ctxt->sax->attributeDecl != NULL))
5041 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5042 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005043 else if (tree != NULL)
5044 xmlFreeEnumeration(tree);
5045
5046 if ((ctxt->sax2) && (defaultValue != NULL) &&
5047 (def != XML_ATTRIBUTE_IMPLIED) &&
5048 (def != XML_ATTRIBUTE_REQUIRED)) {
5049 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5050 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005051 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005052 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5053 }
Owen Taylor3473f882001-02-23 17:55:21 +00005054 if (defaultValue != NULL)
5055 xmlFree(defaultValue);
5056 GROW;
5057 }
5058 if (RAW == '>') {
5059 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005060 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5061 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005062 }
5063 NEXT;
5064 }
Owen Taylor3473f882001-02-23 17:55:21 +00005065 }
5066}
5067
5068/**
5069 * xmlParseElementMixedContentDecl:
5070 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005071 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005072 *
5073 * parse the declaration for a Mixed Element content
5074 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5075 *
5076 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5077 * '(' S? '#PCDATA' S? ')'
5078 *
5079 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5080 *
5081 * [ VC: No Duplicate Types ]
5082 * The same name must not appear more than once in a single
5083 * mixed-content declaration.
5084 *
5085 * returns: the list of the xmlElementContentPtr describing the element choices
5086 */
5087xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005089 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005090 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005091
5092 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005093 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 SKIP(7);
5095 SKIP_BLANKS;
5096 SHRINK;
5097 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005098 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005099 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5100"Element content declaration doesn't start and stop in the same entity\n",
5101 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005102 }
Owen Taylor3473f882001-02-23 17:55:21 +00005103 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005104 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005105 if (RAW == '*') {
5106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5107 NEXT;
5108 }
5109 return(ret);
5110 }
5111 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005112 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 if (ret == NULL) return(NULL);
5114 }
5115 while (RAW == '|') {
5116 NEXT;
5117 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005119 if (ret == NULL) return(NULL);
5120 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005121 if (cur != NULL)
5122 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005123 cur = ret;
5124 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005125 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005127 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005128 if (n->c1 != NULL)
5129 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005130 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005131 if (n != NULL)
5132 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005134 }
5135 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005139 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005140 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005141 return(NULL);
5142 }
5143 SKIP_BLANKS;
5144 GROW;
5145 }
5146 if ((RAW == ')') && (NXT(1) == '*')) {
5147 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005148 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005149 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005150 if (cur->c2 != NULL)
5151 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5156"Element content declaration doesn't start and stop in the same entity\n",
5157 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 SKIP(2);
5160 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005161 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 return(NULL);
5164 }
5165
5166 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005167 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005168 }
5169 return(ret);
5170}
5171
5172/**
5173 * xmlParseElementChildrenContentDecl:
5174 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005175 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005176 *
5177 * parse the declaration for a Mixed Element content
5178 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5179 *
5180 *
5181 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5182 *
5183 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5184 *
5185 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5186 *
5187 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5188 *
5189 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5190 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005191 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005192 * opening or closing parentheses in a choice, seq, or Mixed
5193 * construct is contained in the replacement text for a parameter
5194 * entity, both must be contained in the same replacement text. For
5195 * interoperability, if a parameter-entity reference appears in a
5196 * choice, seq, or Mixed construct, its replacement text should not
5197 * be empty, and neither the first nor last non-blank character of
5198 * the replacement text should be a connector (| or ,).
5199 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005200 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005201 * hierarchy.
5202 */
5203xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005205 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005206 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005207 xmlChar type = 0;
5208
5209 SKIP_BLANKS;
5210 GROW;
5211 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005213
Owen Taylor3473f882001-02-23 17:55:21 +00005214 /* Recurse on first child */
5215 NEXT;
5216 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005217 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005218 SKIP_BLANKS;
5219 GROW;
5220 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005221 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005222 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005223 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005224 return(NULL);
5225 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005227 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005229 return(NULL);
5230 }
Owen Taylor3473f882001-02-23 17:55:21 +00005231 GROW;
5232 if (RAW == '?') {
5233 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5234 NEXT;
5235 } else if (RAW == '*') {
5236 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5237 NEXT;
5238 } else if (RAW == '+') {
5239 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5240 NEXT;
5241 } else {
5242 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 GROW;
5245 }
5246 SKIP_BLANKS;
5247 SHRINK;
5248 while (RAW != ')') {
5249 /*
5250 * Each loop we parse one separator and one element.
5251 */
5252 if (RAW == ',') {
5253 if (type == 0) type = CUR;
5254
5255 /*
5256 * Detect "Name | Name , Name" error
5257 */
5258 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005260 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005261 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005262 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005265 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 return(NULL);
5267 }
5268 NEXT;
5269
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005272 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005273 xmlFreeDocElementContent(ctxt->myDoc, last);
5274 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005275 return(NULL);
5276 }
5277 if (last == NULL) {
5278 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005279 if (ret != NULL)
5280 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005281 ret = cur = op;
5282 } else {
5283 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005284 if (op != NULL)
5285 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005286 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005287 if (last != NULL)
5288 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005289 cur =op;
5290 last = NULL;
5291 }
5292 } else if (RAW == '|') {
5293 if (type == 0) type = CUR;
5294
5295 /*
5296 * Detect "Name , Name | Name" error
5297 */
5298 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005300 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005301 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005302 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005303 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005305 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005306 return(NULL);
5307 }
5308 NEXT;
5309
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005310 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005311 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005312 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005313 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005314 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005315 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 if (last == NULL) {
5319 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005320 if (ret != NULL)
5321 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005322 ret = cur = op;
5323 } else {
5324 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005325 if (op != NULL)
5326 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005327 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005328 if (last != NULL)
5329 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005330 cur =op;
5331 last = NULL;
5332 }
5333 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005335 if ((last != NULL) && (last != ret))
5336 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005337 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005338 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005339 return(NULL);
5340 }
5341 GROW;
5342 SKIP_BLANKS;
5343 GROW;
5344 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005345 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005346 /* Recurse on second child */
5347 NEXT;
5348 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005349 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005350 SKIP_BLANKS;
5351 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005352 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005353 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005354 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005356 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005357 return(NULL);
5358 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005359 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 if (RAW == '?') {
5361 last->ocur = XML_ELEMENT_CONTENT_OPT;
5362 NEXT;
5363 } else if (RAW == '*') {
5364 last->ocur = XML_ELEMENT_CONTENT_MULT;
5365 NEXT;
5366 } else if (RAW == '+') {
5367 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5368 NEXT;
5369 } else {
5370 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5371 }
5372 }
5373 SKIP_BLANKS;
5374 GROW;
5375 }
5376 if ((cur != NULL) && (last != NULL)) {
5377 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005378 if (last != NULL)
5379 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005380 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005381 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005382 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5383"Element content declaration doesn't start and stop in the same entity\n",
5384 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 NEXT;
5387 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005388 if (ret != NULL) {
5389 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5390 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5391 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5392 else
5393 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5394 }
Owen Taylor3473f882001-02-23 17:55:21 +00005395 NEXT;
5396 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005397 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005398 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005399 cur = ret;
5400 /*
5401 * Some normalization:
5402 * (a | b* | c?)* == (a | b | c)*
5403 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005404 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005405 if ((cur->c1 != NULL) &&
5406 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5407 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5408 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5409 if ((cur->c2 != NULL) &&
5410 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5411 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5412 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5413 cur = cur->c2;
5414 }
5415 }
Owen Taylor3473f882001-02-23 17:55:21 +00005416 NEXT;
5417 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005418 if (ret != NULL) {
5419 int found = 0;
5420
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005421 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5422 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5423 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005424 else
5425 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005426 /*
5427 * Some normalization:
5428 * (a | b*)+ == (a | b)*
5429 * (a | b?)+ == (a | b)*
5430 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005431 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005432 if ((cur->c1 != NULL) &&
5433 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5434 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5435 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5436 found = 1;
5437 }
5438 if ((cur->c2 != NULL) &&
5439 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5440 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5441 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5442 found = 1;
5443 }
5444 cur = cur->c2;
5445 }
5446 if (found)
5447 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5448 }
Owen Taylor3473f882001-02-23 17:55:21 +00005449 NEXT;
5450 }
5451 return(ret);
5452}
5453
5454/**
5455 * xmlParseElementContentDecl:
5456 * @ctxt: an XML parser context
5457 * @name: the name of the element being defined.
5458 * @result: the Element Content pointer will be stored here if any
5459 *
5460 * parse the declaration for an Element content either Mixed or Children,
5461 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5462 *
5463 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5464 *
5465 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5466 */
5467
5468int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005469xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005470 xmlElementContentPtr *result) {
5471
5472 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005474 int res;
5475
5476 *result = NULL;
5477
5478 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005479 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005480 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005481 return(-1);
5482 }
5483 NEXT;
5484 GROW;
5485 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005486 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 res = XML_ELEMENT_TYPE_MIXED;
5489 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005490 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 res = XML_ELEMENT_TYPE_ELEMENT;
5492 }
Owen Taylor3473f882001-02-23 17:55:21 +00005493 SKIP_BLANKS;
5494 *result = tree;
5495 return(res);
5496}
5497
5498/**
5499 * xmlParseElementDecl:
5500 * @ctxt: an XML parser context
5501 *
5502 * parse an Element declaration.
5503 *
5504 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5505 *
5506 * [ VC: Unique Element Type Declaration ]
5507 * No element type may be declared more than once
5508 *
5509 * Returns the type of the element, or -1 in case of error
5510 */
5511int
5512xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005513 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005514 int ret = -1;
5515 xmlElementContentPtr content = NULL;
5516
Daniel Veillard4c778d82005-01-23 17:37:44 +00005517 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005518 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005519 xmlParserInputPtr input = ctxt->input;
5520
5521 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005522 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5524 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005525 }
5526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005527 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005528 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005529 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5530 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005531 return(-1);
5532 }
5533 while ((RAW == 0) && (ctxt->inputNr > 1))
5534 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005535 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005540 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005541 SKIP(5);
5542 /*
5543 * Element must always be empty.
5544 */
5545 ret = XML_ELEMENT_TYPE_EMPTY;
5546 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5547 (NXT(2) == 'Y')) {
5548 SKIP(3);
5549 /*
5550 * Element is a generic container.
5551 */
5552 ret = XML_ELEMENT_TYPE_ANY;
5553 } else if (RAW == '(') {
5554 ret = xmlParseElementContentDecl(ctxt, name, &content);
5555 } else {
5556 /*
5557 * [ WFC: PEs in Internal Subset ] error handling.
5558 */
5559 if ((RAW == '%') && (ctxt->external == 0) &&
5560 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005561 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005562 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005563 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005564 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005565 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5566 }
Owen Taylor3473f882001-02-23 17:55:21 +00005567 return(-1);
5568 }
5569
5570 SKIP_BLANKS;
5571 /*
5572 * Pop-up of finished entities.
5573 */
5574 while ((RAW == 0) && (ctxt->inputNr > 1))
5575 xmlPopInput(ctxt);
5576 SKIP_BLANKS;
5577
5578 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005580 if (content != NULL) {
5581 xmlFreeDocElementContent(ctxt->myDoc, content);
5582 }
Owen Taylor3473f882001-02-23 17:55:21 +00005583 } else {
5584 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5586 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588
5589 NEXT;
5590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005591 (ctxt->sax->elementDecl != NULL)) {
5592 if (content != NULL)
5593 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005594 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5595 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005596 if ((content != NULL) && (content->parent == NULL)) {
5597 /*
5598 * this is a trick: if xmlAddElementDecl is called,
5599 * instead of copying the full tree it is plugged directly
5600 * if called from the parser. Avoid duplicating the
5601 * interfaces or change the API/ABI
5602 */
5603 xmlFreeDocElementContent(ctxt->myDoc, content);
5604 }
5605 } else if (content != NULL) {
5606 xmlFreeDocElementContent(ctxt->myDoc, content);
5607 }
Owen Taylor3473f882001-02-23 17:55:21 +00005608 }
Owen Taylor3473f882001-02-23 17:55:21 +00005609 }
5610 return(ret);
5611}
5612
5613/**
Owen Taylor3473f882001-02-23 17:55:21 +00005614 * xmlParseConditionalSections
5615 * @ctxt: an XML parser context
5616 *
5617 * [61] conditionalSect ::= includeSect | ignoreSect
5618 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5619 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5620 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5621 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5622 */
5623
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005624static void
Owen Taylor3473f882001-02-23 17:55:21 +00005625xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5626 SKIP(3);
5627 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005628 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005629 SKIP(7);
5630 SKIP_BLANKS;
5631 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005632 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005633 } else {
5634 NEXT;
5635 }
5636 if (xmlParserDebugEntities) {
5637 if ((ctxt->input != NULL) && (ctxt->input->filename))
5638 xmlGenericError(xmlGenericErrorContext,
5639 "%s(%d): ", ctxt->input->filename,
5640 ctxt->input->line);
5641 xmlGenericError(xmlGenericErrorContext,
5642 "Entering INCLUDE Conditional Section\n");
5643 }
5644
5645 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5646 (NXT(2) != '>'))) {
5647 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005648 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005649
5650 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5651 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005652 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005653 NEXT;
5654 } else if (RAW == '%') {
5655 xmlParsePEReference(ctxt);
5656 } else
5657 xmlParseMarkupDecl(ctxt);
5658
5659 /*
5660 * Pop-up of finished entities.
5661 */
5662 while ((RAW == 0) && (ctxt->inputNr > 1))
5663 xmlPopInput(ctxt);
5664
Daniel Veillardfdc91562002-07-01 21:52:03 +00005665 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005666 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005667 break;
5668 }
5669 }
5670 if (xmlParserDebugEntities) {
5671 if ((ctxt->input != NULL) && (ctxt->input->filename))
5672 xmlGenericError(xmlGenericErrorContext,
5673 "%s(%d): ", ctxt->input->filename,
5674 ctxt->input->line);
5675 xmlGenericError(xmlGenericErrorContext,
5676 "Leaving INCLUDE Conditional Section\n");
5677 }
5678
Daniel Veillarda07050d2003-10-19 14:46:32 +00005679 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005680 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005681 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005682 int depth = 0;
5683
5684 SKIP(6);
5685 SKIP_BLANKS;
5686 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005687 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005688 } else {
5689 NEXT;
5690 }
5691 if (xmlParserDebugEntities) {
5692 if ((ctxt->input != NULL) && (ctxt->input->filename))
5693 xmlGenericError(xmlGenericErrorContext,
5694 "%s(%d): ", ctxt->input->filename,
5695 ctxt->input->line);
5696 xmlGenericError(xmlGenericErrorContext,
5697 "Entering IGNORE Conditional Section\n");
5698 }
5699
5700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005701 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005702 * But disable SAX event generating DTD building in the meantime
5703 */
5704 state = ctxt->disableSAX;
5705 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005707 ctxt->instate = XML_PARSER_IGNORE;
5708
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005709 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005710 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5711 depth++;
5712 SKIP(3);
5713 continue;
5714 }
5715 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5716 if (--depth >= 0) SKIP(3);
5717 continue;
5718 }
5719 NEXT;
5720 continue;
5721 }
5722
5723 ctxt->disableSAX = state;
5724 ctxt->instate = instate;
5725
5726 if (xmlParserDebugEntities) {
5727 if ((ctxt->input != NULL) && (ctxt->input->filename))
5728 xmlGenericError(xmlGenericErrorContext,
5729 "%s(%d): ", ctxt->input->filename,
5730 ctxt->input->line);
5731 xmlGenericError(xmlGenericErrorContext,
5732 "Leaving IGNORE Conditional Section\n");
5733 }
5734
5735 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005736 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738
5739 if (RAW == 0)
5740 SHRINK;
5741
5742 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005743 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 } else {
5745 SKIP(3);
5746 }
5747}
5748
5749/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 * xmlParseMarkupDecl:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse Markup declarations
5754 *
5755 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5756 * NotationDecl | PI | Comment
5757 *
5758 * [ VC: Proper Declaration/PE Nesting ]
5759 * Parameter-entity replacement text must be properly nested with
5760 * markup declarations. That is to say, if either the first character
5761 * or the last character of a markup declaration (markupdecl above) is
5762 * contained in the replacement text for a parameter-entity reference,
5763 * both must be contained in the same replacement text.
5764 *
5765 * [ WFC: PEs in Internal Subset ]
5766 * In the internal DTD subset, parameter-entity references can occur
5767 * only where markup declarations can occur, not within markup declarations.
5768 * (This does not apply to references that occur in external parameter
5769 * entities or to the external subset.)
5770 */
5771void
5772xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5773 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005774 if (CUR == '<') {
5775 if (NXT(1) == '!') {
5776 switch (NXT(2)) {
5777 case 'E':
5778 if (NXT(3) == 'L')
5779 xmlParseElementDecl(ctxt);
5780 else if (NXT(3) == 'N')
5781 xmlParseEntityDecl(ctxt);
5782 break;
5783 case 'A':
5784 xmlParseAttributeListDecl(ctxt);
5785 break;
5786 case 'N':
5787 xmlParseNotationDecl(ctxt);
5788 break;
5789 case '-':
5790 xmlParseComment(ctxt);
5791 break;
5792 default:
5793 /* there is an error but it will be detected later */
5794 break;
5795 }
5796 } else if (NXT(1) == '?') {
5797 xmlParsePI(ctxt);
5798 }
5799 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005800 /*
5801 * This is only for internal subset. On external entities,
5802 * the replacement is done before parsing stage
5803 */
5804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5805 xmlParsePEReference(ctxt);
5806
5807 /*
5808 * Conditional sections are allowed from entities included
5809 * by PE References in the internal subset.
5810 */
5811 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5812 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5813 xmlParseConditionalSections(ctxt);
5814 }
5815 }
5816
5817 ctxt->instate = XML_PARSER_DTD;
5818}
5819
5820/**
5821 * xmlParseTextDecl:
5822 * @ctxt: an XML parser context
5823 *
5824 * parse an XML declaration header for external entities
5825 *
5826 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5827 *
5828 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5829 */
5830
5831void
5832xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5833 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005834 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005835
5836 /*
5837 * We know that '<?xml' is here.
5838 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005840 SKIP(5);
5841 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005842 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005843 return;
5844 }
5845
William M. Brack76e95df2003-10-18 16:20:14 +00005846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5848 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005849 }
5850 SKIP_BLANKS;
5851
5852 /*
5853 * We may have the VersionInfo here.
5854 */
5855 version = xmlParseVersionInfo(ctxt);
5856 if (version == NULL)
5857 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005858 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005859 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005860 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5861 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005862 }
5863 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005864 ctxt->input->version = version;
5865
5866 /*
5867 * We must have the encoding declaration
5868 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005869 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005870 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5871 /*
5872 * The XML REC instructs us to stop parsing right here
5873 */
5874 return;
5875 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005876 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5877 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5878 "Missing encoding in text declaration\n");
5879 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005880
5881 SKIP_BLANKS;
5882 if ((RAW == '?') && (NXT(1) == '>')) {
5883 SKIP(2);
5884 } else if (RAW == '>') {
5885 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005886 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005887 NEXT;
5888 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005889 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005890 MOVETO_ENDTAG(CUR_PTR);
5891 NEXT;
5892 }
5893}
5894
5895/**
Owen Taylor3473f882001-02-23 17:55:21 +00005896 * xmlParseExternalSubset:
5897 * @ctxt: an XML parser context
5898 * @ExternalID: the external identifier
5899 * @SystemID: the system identifier (or URL)
5900 *
5901 * parse Markup declarations from an external subset
5902 *
5903 * [30] extSubset ::= textDecl? extSubsetDecl
5904 *
5905 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5906 */
5907void
5908xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5909 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005910 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005912 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005913 xmlParseTextDecl(ctxt);
5914 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5915 /*
5916 * The XML REC instructs us to stop parsing right here
5917 */
5918 ctxt->instate = XML_PARSER_EOF;
5919 return;
5920 }
5921 }
5922 if (ctxt->myDoc == NULL) {
5923 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5924 }
5925 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5926 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5927
5928 ctxt->instate = XML_PARSER_DTD;
5929 ctxt->external = 1;
5930 while (((RAW == '<') && (NXT(1) == '?')) ||
5931 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005932 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005933 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005934 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005935
5936 GROW;
5937 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5938 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005939 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005940 NEXT;
5941 } else if (RAW == '%') {
5942 xmlParsePEReference(ctxt);
5943 } else
5944 xmlParseMarkupDecl(ctxt);
5945
5946 /*
5947 * Pop-up of finished entities.
5948 */
5949 while ((RAW == 0) && (ctxt->inputNr > 1))
5950 xmlPopInput(ctxt);
5951
Daniel Veillardfdc91562002-07-01 21:52:03 +00005952 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005953 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005954 break;
5955 }
5956 }
5957
5958 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005959 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 }
5961
5962}
5963
5964/**
5965 * xmlParseReference:
5966 * @ctxt: an XML parser context
5967 *
5968 * parse and handle entity references in content, depending on the SAX
5969 * interface, this may end-up in a call to character() if this is a
5970 * CharRef, a predefined entity, if there is no reference() callback.
5971 * or if the parser was asked to switch to that mode.
5972 *
5973 * [67] Reference ::= EntityRef | CharRef
5974 */
5975void
5976xmlParseReference(xmlParserCtxtPtr ctxt) {
5977 xmlEntityPtr ent;
5978 xmlChar *val;
5979 if (RAW != '&') return;
5980
5981 if (NXT(1) == '#') {
5982 int i = 0;
5983 xmlChar out[10];
5984 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005985 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005986
5987 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5988 /*
5989 * So we are using non-UTF-8 buffers
5990 * Check that the char fit on 8bits, if not
5991 * generate a CharRef.
5992 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005993 if (value <= 0xFF) {
5994 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005995 out[1] = 0;
5996 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5997 (!ctxt->disableSAX))
5998 ctxt->sax->characters(ctxt->userData, out, 1);
5999 } else {
6000 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006001 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006002 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006003 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6005 (!ctxt->disableSAX))
6006 ctxt->sax->reference(ctxt->userData, out);
6007 }
6008 } else {
6009 /*
6010 * Just encode the value in UTF-8
6011 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006012 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006013 out[i] = 0;
6014 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6015 (!ctxt->disableSAX))
6016 ctxt->sax->characters(ctxt->userData, out, i);
6017 }
6018 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006019 int was_checked;
6020
Owen Taylor3473f882001-02-23 17:55:21 +00006021 ent = xmlParseEntityRef(ctxt);
6022 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006023 if (!ctxt->wellFormed)
6024 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006025 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006026 if ((ent->name != NULL) &&
6027 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6028 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006029 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006030
6031
6032 /*
6033 * The first reference to the entity trigger a parsing phase
6034 * where the ent->children is filled with the result from
6035 * the parsing.
6036 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006037 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006038 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006039
Owen Taylor3473f882001-02-23 17:55:21 +00006040 value = ent->content;
6041
6042 /*
6043 * Check that this entity is well formed
6044 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006045 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006046 (value[1] == 0) && (value[0] == '<') &&
6047 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6048 /*
6049 * DONE: get definite answer on this !!!
6050 * Lots of entity decls are used to declare a single
6051 * char
6052 * <!ENTITY lt "<">
6053 * Which seems to be valid since
6054 * 2.4: The ampersand character (&) and the left angle
6055 * bracket (<) may appear in their literal form only
6056 * when used ... They are also legal within the literal
6057 * entity value of an internal entity declaration;i
6058 * see "4.3.2 Well-Formed Parsed Entities".
6059 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6060 * Looking at the OASIS test suite and James Clark
6061 * tests, this is broken. However the XML REC uses
6062 * it. Is the XML REC not well-formed ????
6063 * This is a hack to avoid this problem
6064 *
6065 * ANSWER: since lt gt amp .. are already defined,
6066 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006067 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006068 * is lousy but acceptable.
6069 */
6070 list = xmlNewDocText(ctxt->myDoc, value);
6071 if (list != NULL) {
6072 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6073 (ent->children == NULL)) {
6074 ent->children = list;
6075 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006076 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006077 list->parent = (xmlNodePtr) ent;
6078 } else {
6079 xmlFreeNodeList(list);
6080 }
6081 } else if (list != NULL) {
6082 xmlFreeNodeList(list);
6083 }
6084 } else {
6085 /*
6086 * 4.3.2: An internal general parsed entity is well-formed
6087 * if its replacement text matches the production labeled
6088 * content.
6089 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006090
6091 void *user_data;
6092 /*
6093 * This is a bit hackish but this seems the best
6094 * way to make sure both SAX and DOM entity support
6095 * behaves okay.
6096 */
6097 if (ctxt->userData == ctxt)
6098 user_data = NULL;
6099 else
6100 user_data = ctxt->userData;
6101
Owen Taylor3473f882001-02-23 17:55:21 +00006102 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6103 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006104 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6105 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006106 ctxt->depth--;
6107 } else if (ent->etype ==
6108 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6109 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006110 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006111 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006112 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 ctxt->depth--;
6114 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006115 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006116 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6117 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006120 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006121 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006122 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006123 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6124 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006125 (ent->children == NULL)) {
6126 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006127 if (ctxt->replaceEntities) {
6128 /*
6129 * Prune it directly in the generated document
6130 * except for single text nodes.
6131 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006132 if (((list->type == XML_TEXT_NODE) &&
6133 (list->next == NULL)) ||
6134 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006135 list->parent = (xmlNodePtr) ent;
6136 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006137 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006138 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006139 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006140 while (list != NULL) {
6141 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006142 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006143 if (list->next == NULL)
6144 ent->last = list;
6145 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006146 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006147 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006148#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006149 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6150 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006151#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006152 }
6153 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006154 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006155 while (list != NULL) {
6156 list->parent = (xmlNodePtr) ent;
6157 if (list->next == NULL)
6158 ent->last = list;
6159 list = list->next;
6160 }
Owen Taylor3473f882001-02-23 17:55:21 +00006161 }
6162 } else {
6163 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006164 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006166 } else if ((ret != XML_ERR_OK) &&
6167 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006168 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6169 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 } else if (list != NULL) {
6171 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006172 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006173 }
6174 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006175 ent->checked = 1;
6176 }
6177
6178 if (ent->children == NULL) {
6179 /*
6180 * Probably running in SAX mode and the callbacks don't
6181 * build the entity content. So unless we already went
6182 * though parsing for first checking go though the entity
6183 * content to generate callbacks associated to the entity
6184 */
6185 if (was_checked == 1) {
6186 void *user_data;
6187 /*
6188 * This is a bit hackish but this seems the best
6189 * way to make sure both SAX and DOM entity support
6190 * behaves okay.
6191 */
6192 if (ctxt->userData == ctxt)
6193 user_data = NULL;
6194 else
6195 user_data = ctxt->userData;
6196
6197 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6198 ctxt->depth++;
6199 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6200 ent->content, user_data, NULL);
6201 ctxt->depth--;
6202 } else if (ent->etype ==
6203 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6204 ctxt->depth++;
6205 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6206 ctxt->sax, user_data, ctxt->depth,
6207 ent->URI, ent->ExternalID, NULL);
6208 ctxt->depth--;
6209 } else {
6210 ret = XML_ERR_ENTITY_PE_INTERNAL;
6211 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6212 "invalid entity type found\n", NULL);
6213 }
6214 if (ret == XML_ERR_ENTITY_LOOP) {
6215 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6216 return;
6217 }
6218 }
6219 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6220 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6221 /*
6222 * Entity reference callback comes second, it's somewhat
6223 * superfluous but a compatibility to historical behaviour
6224 */
6225 ctxt->sax->reference(ctxt->userData, ent->name);
6226 }
6227 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006228 }
6229 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006230 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006231 /*
6232 * Create a node.
6233 */
6234 ctxt->sax->reference(ctxt->userData, ent->name);
6235 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006236 }
6237 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006238 /*
6239 * There is a problem on the handling of _private for entities
6240 * (bug 155816): Should we copy the content of the field from
6241 * the entity (possibly overwriting some value set by the user
6242 * when a copy is created), should we leave it alone, or should
6243 * we try to take care of different situations? The problem
6244 * is exacerbated by the usage of this field by the xmlReader.
6245 * To fix this bug, we look at _private on the created node
6246 * and, if it's NULL, we copy in whatever was in the entity.
6247 * If it's not NULL we leave it alone. This is somewhat of a
6248 * hack - maybe we should have further tests to determine
6249 * what to do.
6250 */
Owen Taylor3473f882001-02-23 17:55:21 +00006251 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6252 /*
6253 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006254 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006255 * In the first occurrence list contains the replacement.
6256 * progressive == 2 means we are operating on the Reader
6257 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006258 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006259 if (((list == NULL) && (ent->owner == 0)) ||
6260 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006261 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006262
6263 /*
6264 * when operating on a reader, the entities definitions
6265 * are always owning the entities subtree.
6266 if (ctxt->parseMode == XML_PARSE_READER)
6267 ent->owner = 1;
6268 */
6269
Daniel Veillard62f313b2001-07-04 19:49:14 +00006270 cur = ent->children;
6271 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006272 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006273 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006274 if (nw->_private == NULL)
6275 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006276 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006277 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006278 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006279 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006280 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006281 if (cur == ent->last) {
6282 /*
6283 * needed to detect some strange empty
6284 * node cases in the reader tests
6285 */
6286 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006287 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006288 (nw->type == XML_ELEMENT_NODE) &&
6289 (nw->children == NULL))
6290 nw->extra = 1;
6291
Daniel Veillard62f313b2001-07-04 19:49:14 +00006292 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006293 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006294 cur = cur->next;
6295 }
Daniel Veillard81273902003-09-30 00:43:48 +00006296#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006297 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006298 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006299#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006300 } else if (list == NULL) {
6301 xmlNodePtr nw = NULL, cur, next, last,
6302 firstChild = NULL;
6303 /*
6304 * Copy the entity child list and make it the new
6305 * entity child list. The goal is to make sure any
6306 * ID or REF referenced will be the one from the
6307 * document content and not the entity copy.
6308 */
6309 cur = ent->children;
6310 ent->children = NULL;
6311 last = ent->last;
6312 ent->last = NULL;
6313 while (cur != NULL) {
6314 next = cur->next;
6315 cur->next = NULL;
6316 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006318 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006319 if (nw->_private == NULL)
6320 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006321 if (firstChild == NULL){
6322 firstChild = cur;
6323 }
6324 xmlAddChild((xmlNodePtr) ent, nw);
6325 xmlAddChild(ctxt->node, cur);
6326 }
6327 if (cur == last)
6328 break;
6329 cur = next;
6330 }
6331 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006332#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006333 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6334 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006335#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006336 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006337 const xmlChar *nbktext;
6338
Daniel Veillard62f313b2001-07-04 19:49:14 +00006339 /*
6340 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006341 * node with a possible previous text one which
6342 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006343 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006344 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6345 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006346 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006347 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006348 if ((ent->last != ent->children) &&
6349 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006350 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006351 xmlAddChildList(ctxt->node, ent->children);
6352 }
6353
Owen Taylor3473f882001-02-23 17:55:21 +00006354 /*
6355 * This is to avoid a nasty side effect, see
6356 * characters() in SAX.c
6357 */
6358 ctxt->nodemem = 0;
6359 ctxt->nodelen = 0;
6360 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 }
6363 } else {
6364 val = ent->content;
6365 if (val == NULL) return;
6366 /*
6367 * inline the entity.
6368 */
6369 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6370 (!ctxt->disableSAX))
6371 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6372 }
6373 }
6374}
6375
6376/**
6377 * xmlParseEntityRef:
6378 * @ctxt: an XML parser context
6379 *
6380 * parse ENTITY references declarations
6381 *
6382 * [68] EntityRef ::= '&' Name ';'
6383 *
6384 * [ WFC: Entity Declared ]
6385 * In a document without any DTD, a document with only an internal DTD
6386 * subset which contains no parameter entity references, or a document
6387 * with "standalone='yes'", the Name given in the entity reference
6388 * must match that in an entity declaration, except that well-formed
6389 * documents need not declare any of the following entities: amp, lt,
6390 * gt, apos, quot. The declaration of a parameter entity must precede
6391 * any reference to it. Similarly, the declaration of a general entity
6392 * must precede any reference to it which appears in a default value in an
6393 * attribute-list declaration. Note that if entities are declared in the
6394 * external subset or in external parameter entities, a non-validating
6395 * processor is not obligated to read and process their declarations;
6396 * for such documents, the rule that an entity must be declared is a
6397 * well-formedness constraint only if standalone='yes'.
6398 *
6399 * [ WFC: Parsed Entity ]
6400 * An entity reference must not contain the name of an unparsed entity
6401 *
6402 * Returns the xmlEntityPtr if found, or NULL otherwise.
6403 */
6404xmlEntityPtr
6405xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006406 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006407 xmlEntityPtr ent = NULL;
6408
6409 GROW;
6410
6411 if (RAW == '&') {
6412 NEXT;
6413 name = xmlParseName(ctxt);
6414 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006415 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6416 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006417 } else {
6418 if (RAW == ';') {
6419 NEXT;
6420 /*
6421 * Ask first SAX for entity resolution, otherwise try the
6422 * predefined set.
6423 */
6424 if (ctxt->sax != NULL) {
6425 if (ctxt->sax->getEntity != NULL)
6426 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006427 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006428 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006429 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6430 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006431 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006432 }
Owen Taylor3473f882001-02-23 17:55:21 +00006433 }
6434 /*
6435 * [ WFC: Entity Declared ]
6436 * In a document without any DTD, a document with only an
6437 * internal DTD subset which contains no parameter entity
6438 * references, or a document with "standalone='yes'", the
6439 * Name given in the entity reference must match that in an
6440 * entity declaration, except that well-formed documents
6441 * need not declare any of the following entities: amp, lt,
6442 * gt, apos, quot.
6443 * The declaration of a parameter entity must precede any
6444 * reference to it.
6445 * Similarly, the declaration of a general entity must
6446 * precede any reference to it which appears in a default
6447 * value in an attribute-list declaration. Note that if
6448 * entities are declared in the external subset or in
6449 * external parameter entities, a non-validating processor
6450 * is not obligated to read and process their declarations;
6451 * for such documents, the rule that an entity must be
6452 * declared is a well-formedness constraint only if
6453 * standalone='yes'.
6454 */
6455 if (ent == NULL) {
6456 if ((ctxt->standalone == 1) ||
6457 ((ctxt->hasExternalSubset == 0) &&
6458 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006460 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006462 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006463 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006464 if ((ctxt->inSubset == 0) &&
6465 (ctxt->sax != NULL) &&
6466 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006467 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006468 }
Owen Taylor3473f882001-02-23 17:55:21 +00006469 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006470 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006471 }
6472
6473 /*
6474 * [ WFC: Parsed Entity ]
6475 * An entity reference must not contain the name of an
6476 * unparsed entity
6477 */
6478 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006480 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 }
6482
6483 /*
6484 * [ WFC: No External Entity References ]
6485 * Attribute values cannot contain direct or indirect
6486 * entity references to external entities.
6487 */
6488 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6489 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006490 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6491 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 }
6493 /*
6494 * [ WFC: No < in Attribute Values ]
6495 * The replacement text of any entity referred to directly or
6496 * indirectly in an attribute value (other than "&lt;") must
6497 * not contain a <.
6498 */
6499 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6500 (ent != NULL) &&
6501 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6502 (ent->content != NULL) &&
6503 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006504 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006505 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006506 }
6507
6508 /*
6509 * Internal check, no parameter entities here ...
6510 */
6511 else {
6512 switch (ent->etype) {
6513 case XML_INTERNAL_PARAMETER_ENTITY:
6514 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006515 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6516 "Attempt to reference the parameter entity '%s'\n",
6517 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 break;
6519 default:
6520 break;
6521 }
6522 }
6523
6524 /*
6525 * [ WFC: No Recursion ]
6526 * A parsed entity must not contain a recursive reference
6527 * to itself, either directly or indirectly.
6528 * Done somewhere else
6529 */
6530
6531 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006532 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
Owen Taylor3473f882001-02-23 17:55:21 +00006534 }
6535 }
6536 return(ent);
6537}
6538
6539/**
6540 * xmlParseStringEntityRef:
6541 * @ctxt: an XML parser context
6542 * @str: a pointer to an index in the string
6543 *
6544 * parse ENTITY references declarations, but this version parses it from
6545 * a string value.
6546 *
6547 * [68] EntityRef ::= '&' Name ';'
6548 *
6549 * [ WFC: Entity Declared ]
6550 * In a document without any DTD, a document with only an internal DTD
6551 * subset which contains no parameter entity references, or a document
6552 * with "standalone='yes'", the Name given in the entity reference
6553 * must match that in an entity declaration, except that well-formed
6554 * documents need not declare any of the following entities: amp, lt,
6555 * gt, apos, quot. The declaration of a parameter entity must precede
6556 * any reference to it. Similarly, the declaration of a general entity
6557 * must precede any reference to it which appears in a default value in an
6558 * attribute-list declaration. Note that if entities are declared in the
6559 * external subset or in external parameter entities, a non-validating
6560 * processor is not obligated to read and process their declarations;
6561 * for such documents, the rule that an entity must be declared is a
6562 * well-formedness constraint only if standalone='yes'.
6563 *
6564 * [ WFC: Parsed Entity ]
6565 * An entity reference must not contain the name of an unparsed entity
6566 *
6567 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6568 * is updated to the current location in the string.
6569 */
6570xmlEntityPtr
6571xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6572 xmlChar *name;
6573 const xmlChar *ptr;
6574 xmlChar cur;
6575 xmlEntityPtr ent = NULL;
6576
6577 if ((str == NULL) || (*str == NULL))
6578 return(NULL);
6579 ptr = *str;
6580 cur = *ptr;
6581 if (cur == '&') {
6582 ptr++;
6583 cur = *ptr;
6584 name = xmlParseStringName(ctxt, &ptr);
6585 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006586 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6587 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006588 } else {
6589 if (*ptr == ';') {
6590 ptr++;
6591 /*
6592 * Ask first SAX for entity resolution, otherwise try the
6593 * predefined set.
6594 */
6595 if (ctxt->sax != NULL) {
6596 if (ctxt->sax->getEntity != NULL)
6597 ent = ctxt->sax->getEntity(ctxt->userData, name);
6598 if (ent == NULL)
6599 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006600 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006601 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006602 }
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604 /*
6605 * [ WFC: Entity Declared ]
6606 * In a document without any DTD, a document with only an
6607 * internal DTD subset which contains no parameter entity
6608 * references, or a document with "standalone='yes'", the
6609 * Name given in the entity reference must match that in an
6610 * entity declaration, except that well-formed documents
6611 * need not declare any of the following entities: amp, lt,
6612 * gt, apos, quot.
6613 * The declaration of a parameter entity must precede any
6614 * reference to it.
6615 * Similarly, the declaration of a general entity must
6616 * precede any reference to it which appears in a default
6617 * value in an attribute-list declaration. Note that if
6618 * entities are declared in the external subset or in
6619 * external parameter entities, a non-validating processor
6620 * is not obligated to read and process their declarations;
6621 * for such documents, the rule that an entity must be
6622 * declared is a well-formedness constraint only if
6623 * standalone='yes'.
6624 */
6625 if (ent == NULL) {
6626 if ((ctxt->standalone == 1) ||
6627 ((ctxt->hasExternalSubset == 0) &&
6628 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006629 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006630 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006631 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006632 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006633 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006634 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006635 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006636 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006637 }
6638
6639 /*
6640 * [ WFC: Parsed Entity ]
6641 * An entity reference must not contain the name of an
6642 * unparsed entity
6643 */
6644 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006645 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006646 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 }
6648
6649 /*
6650 * [ WFC: No External Entity References ]
6651 * Attribute values cannot contain direct or indirect
6652 * entity references to external entities.
6653 */
6654 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6655 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006657 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659 /*
6660 * [ WFC: No < in Attribute Values ]
6661 * The replacement text of any entity referred to directly or
6662 * indirectly in an attribute value (other than "&lt;") must
6663 * not contain a <.
6664 */
6665 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6666 (ent != NULL) &&
6667 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6668 (ent->content != NULL) &&
6669 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006670 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6671 "'<' in entity '%s' is not allowed in attributes values\n",
6672 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 }
6674
6675 /*
6676 * Internal check, no parameter entities here ...
6677 */
6678 else {
6679 switch (ent->etype) {
6680 case XML_INTERNAL_PARAMETER_ENTITY:
6681 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006682 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6683 "Attempt to reference the parameter entity '%s'\n",
6684 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 break;
6686 default:
6687 break;
6688 }
6689 }
6690
6691 /*
6692 * [ WFC: No Recursion ]
6693 * A parsed entity must not contain a recursive reference
6694 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006695 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006696 */
6697
6698 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006699 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
6701 xmlFree(name);
6702 }
6703 }
6704 *str = ptr;
6705 return(ent);
6706}
6707
6708/**
6709 * xmlParsePEReference:
6710 * @ctxt: an XML parser context
6711 *
6712 * parse PEReference declarations
6713 * The entity content is handled directly by pushing it's content as
6714 * a new input stream.
6715 *
6716 * [69] PEReference ::= '%' Name ';'
6717 *
6718 * [ WFC: No Recursion ]
6719 * A parsed entity must not contain a recursive
6720 * reference to itself, either directly or indirectly.
6721 *
6722 * [ WFC: Entity Declared ]
6723 * In a document without any DTD, a document with only an internal DTD
6724 * subset which contains no parameter entity references, or a document
6725 * with "standalone='yes'", ... ... The declaration of a parameter
6726 * entity must precede any reference to it...
6727 *
6728 * [ VC: Entity Declared ]
6729 * In a document with an external subset or external parameter entities
6730 * with "standalone='no'", ... ... The declaration of a parameter entity
6731 * must precede any reference to it...
6732 *
6733 * [ WFC: In DTD ]
6734 * Parameter-entity references may only appear in the DTD.
6735 * NOTE: misleading but this is handled.
6736 */
6737void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006738xmlParsePEReference(xmlParserCtxtPtr ctxt)
6739{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006741 xmlEntityPtr entity = NULL;
6742 xmlParserInputPtr input;
6743
6744 if (RAW == '%') {
6745 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006746 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006747 if (name == NULL) {
6748 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6749 "xmlParsePEReference: no name\n");
6750 } else {
6751 if (RAW == ';') {
6752 NEXT;
6753 if ((ctxt->sax != NULL) &&
6754 (ctxt->sax->getParameterEntity != NULL))
6755 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6756 name);
6757 if (entity == NULL) {
6758 /*
6759 * [ WFC: Entity Declared ]
6760 * In a document without any DTD, a document with only an
6761 * internal DTD subset which contains no parameter entity
6762 * references, or a document with "standalone='yes'", ...
6763 * ... The declaration of a parameter entity must precede
6764 * any reference to it...
6765 */
6766 if ((ctxt->standalone == 1) ||
6767 ((ctxt->hasExternalSubset == 0) &&
6768 (ctxt->hasPErefs == 0))) {
6769 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6770 "PEReference: %%%s; not found\n",
6771 name);
6772 } else {
6773 /*
6774 * [ VC: Entity Declared ]
6775 * In a document with an external subset or external
6776 * parameter entities with "standalone='no'", ...
6777 * ... The declaration of a parameter entity must
6778 * precede any reference to it...
6779 */
6780 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6781 "PEReference: %%%s; not found\n",
6782 name, NULL);
6783 ctxt->valid = 0;
6784 }
6785 } else {
6786 /*
6787 * Internal checking in case the entity quest barfed
6788 */
6789 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6790 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6791 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6792 "Internal: %%%s; is not a parameter entity\n",
6793 name, NULL);
6794 } else if (ctxt->input->free != deallocblankswrapper) {
6795 input =
6796 xmlNewBlanksWrapperInputStream(ctxt, entity);
6797 xmlPushInput(ctxt, input);
6798 } else {
6799 /*
6800 * TODO !!!
6801 * handle the extra spaces added before and after
6802 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6803 */
6804 input = xmlNewEntityInputStream(ctxt, entity);
6805 xmlPushInput(ctxt, input);
6806 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006807 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006808 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006809 xmlParseTextDecl(ctxt);
6810 if (ctxt->errNo ==
6811 XML_ERR_UNSUPPORTED_ENCODING) {
6812 /*
6813 * The XML REC instructs us to stop parsing
6814 * right here
6815 */
6816 ctxt->instate = XML_PARSER_EOF;
6817 return;
6818 }
6819 }
6820 }
6821 }
6822 ctxt->hasPErefs = 1;
6823 } else {
6824 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6825 }
6826 }
Owen Taylor3473f882001-02-23 17:55:21 +00006827 }
6828}
6829
6830/**
6831 * xmlParseStringPEReference:
6832 * @ctxt: an XML parser context
6833 * @str: a pointer to an index in the string
6834 *
6835 * parse PEReference declarations
6836 *
6837 * [69] PEReference ::= '%' Name ';'
6838 *
6839 * [ WFC: No Recursion ]
6840 * A parsed entity must not contain a recursive
6841 * reference to itself, either directly or indirectly.
6842 *
6843 * [ WFC: Entity Declared ]
6844 * In a document without any DTD, a document with only an internal DTD
6845 * subset which contains no parameter entity references, or a document
6846 * with "standalone='yes'", ... ... The declaration of a parameter
6847 * entity must precede any reference to it...
6848 *
6849 * [ VC: Entity Declared ]
6850 * In a document with an external subset or external parameter entities
6851 * with "standalone='no'", ... ... The declaration of a parameter entity
6852 * must precede any reference to it...
6853 *
6854 * [ WFC: In DTD ]
6855 * Parameter-entity references may only appear in the DTD.
6856 * NOTE: misleading but this is handled.
6857 *
6858 * Returns the string of the entity content.
6859 * str is updated to the current value of the index
6860 */
6861xmlEntityPtr
6862xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6863 const xmlChar *ptr;
6864 xmlChar cur;
6865 xmlChar *name;
6866 xmlEntityPtr entity = NULL;
6867
6868 if ((str == NULL) || (*str == NULL)) return(NULL);
6869 ptr = *str;
6870 cur = *ptr;
6871 if (cur == '%') {
6872 ptr++;
6873 cur = *ptr;
6874 name = xmlParseStringName(ctxt, &ptr);
6875 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006876 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6877 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006878 } else {
6879 cur = *ptr;
6880 if (cur == ';') {
6881 ptr++;
6882 cur = *ptr;
6883 if ((ctxt->sax != NULL) &&
6884 (ctxt->sax->getParameterEntity != NULL))
6885 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6886 name);
6887 if (entity == NULL) {
6888 /*
6889 * [ WFC: Entity Declared ]
6890 * In a document without any DTD, a document with only an
6891 * internal DTD subset which contains no parameter entity
6892 * references, or a document with "standalone='yes'", ...
6893 * ... The declaration of a parameter entity must precede
6894 * any reference to it...
6895 */
6896 if ((ctxt->standalone == 1) ||
6897 ((ctxt->hasExternalSubset == 0) &&
6898 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006899 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006900 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006901 } else {
6902 /*
6903 * [ VC: Entity Declared ]
6904 * In a document with an external subset or external
6905 * parameter entities with "standalone='no'", ...
6906 * ... The declaration of a parameter entity must
6907 * precede any reference to it...
6908 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6910 "PEReference: %%%s; not found\n",
6911 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006912 ctxt->valid = 0;
6913 }
6914 } else {
6915 /*
6916 * Internal checking in case the entity quest barfed
6917 */
6918 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6919 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006920 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6921 "%%%s; is not a parameter entity\n",
6922 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006923 }
6924 }
6925 ctxt->hasPErefs = 1;
6926 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006927 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006928 }
6929 xmlFree(name);
6930 }
6931 }
6932 *str = ptr;
6933 return(entity);
6934}
6935
6936/**
6937 * xmlParseDocTypeDecl:
6938 * @ctxt: an XML parser context
6939 *
6940 * parse a DOCTYPE declaration
6941 *
6942 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6943 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6944 *
6945 * [ VC: Root Element Type ]
6946 * The Name in the document type declaration must match the element
6947 * type of the root element.
6948 */
6949
6950void
6951xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006952 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006953 xmlChar *ExternalID = NULL;
6954 xmlChar *URI = NULL;
6955
6956 /*
6957 * We know that '<!DOCTYPE' has been detected.
6958 */
6959 SKIP(9);
6960
6961 SKIP_BLANKS;
6962
6963 /*
6964 * Parse the DOCTYPE name.
6965 */
6966 name = xmlParseName(ctxt);
6967 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006968 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6969 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006970 }
6971 ctxt->intSubName = name;
6972
6973 SKIP_BLANKS;
6974
6975 /*
6976 * Check for SystemID and ExternalID
6977 */
6978 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6979
6980 if ((URI != NULL) || (ExternalID != NULL)) {
6981 ctxt->hasExternalSubset = 1;
6982 }
6983 ctxt->extSubURI = URI;
6984 ctxt->extSubSystem = ExternalID;
6985
6986 SKIP_BLANKS;
6987
6988 /*
6989 * Create and update the internal subset.
6990 */
6991 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6992 (!ctxt->disableSAX))
6993 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6994
6995 /*
6996 * Is there any internal subset declarations ?
6997 * they are handled separately in xmlParseInternalSubset()
6998 */
6999 if (RAW == '[')
7000 return;
7001
7002 /*
7003 * We should be at the end of the DOCTYPE declaration.
7004 */
7005 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007006 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007007 }
7008 NEXT;
7009}
7010
7011/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007012 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007013 * @ctxt: an XML parser context
7014 *
7015 * parse the internal subset declaration
7016 *
7017 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7018 */
7019
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007020static void
Owen Taylor3473f882001-02-23 17:55:21 +00007021xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7022 /*
7023 * Is there any DTD definition ?
7024 */
7025 if (RAW == '[') {
7026 ctxt->instate = XML_PARSER_DTD;
7027 NEXT;
7028 /*
7029 * Parse the succession of Markup declarations and
7030 * PEReferences.
7031 * Subsequence (markupdecl | PEReference | S)*
7032 */
7033 while (RAW != ']') {
7034 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007035 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007036
7037 SKIP_BLANKS;
7038 xmlParseMarkupDecl(ctxt);
7039 xmlParsePEReference(ctxt);
7040
7041 /*
7042 * Pop-up of finished entities.
7043 */
7044 while ((RAW == 0) && (ctxt->inputNr > 1))
7045 xmlPopInput(ctxt);
7046
7047 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007048 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007049 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007050 break;
7051 }
7052 }
7053 if (RAW == ']') {
7054 NEXT;
7055 SKIP_BLANKS;
7056 }
7057 }
7058
7059 /*
7060 * We should be at the end of the DOCTYPE declaration.
7061 */
7062 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007063 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007064 }
7065 NEXT;
7066}
7067
Daniel Veillard81273902003-09-30 00:43:48 +00007068#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007069/**
7070 * xmlParseAttribute:
7071 * @ctxt: an XML parser context
7072 * @value: a xmlChar ** used to store the value of the attribute
7073 *
7074 * parse an attribute
7075 *
7076 * [41] Attribute ::= Name Eq AttValue
7077 *
7078 * [ WFC: No External Entity References ]
7079 * Attribute values cannot contain direct or indirect entity references
7080 * to external entities.
7081 *
7082 * [ WFC: No < in Attribute Values ]
7083 * The replacement text of any entity referred to directly or indirectly in
7084 * an attribute value (other than "&lt;") must not contain a <.
7085 *
7086 * [ VC: Attribute Value Type ]
7087 * The attribute must have been declared; the value must be of the type
7088 * declared for it.
7089 *
7090 * [25] Eq ::= S? '=' S?
7091 *
7092 * With namespace:
7093 *
7094 * [NS 11] Attribute ::= QName Eq AttValue
7095 *
7096 * Also the case QName == xmlns:??? is handled independently as a namespace
7097 * definition.
7098 *
7099 * Returns the attribute name, and the value in *value.
7100 */
7101
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007102const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007103xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007104 const xmlChar *name;
7105 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007106
7107 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007108 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007109 name = xmlParseName(ctxt);
7110 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007112 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007113 return(NULL);
7114 }
7115
7116 /*
7117 * read the value
7118 */
7119 SKIP_BLANKS;
7120 if (RAW == '=') {
7121 NEXT;
7122 SKIP_BLANKS;
7123 val = xmlParseAttValue(ctxt);
7124 ctxt->instate = XML_PARSER_CONTENT;
7125 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007126 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007127 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 return(NULL);
7129 }
7130
7131 /*
7132 * Check that xml:lang conforms to the specification
7133 * No more registered as an error, just generate a warning now
7134 * since this was deprecated in XML second edition
7135 */
7136 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7137 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007138 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7139 "Malformed value for xml:lang : %s\n",
7140 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007141 }
7142 }
7143
7144 /*
7145 * Check that xml:space conforms to the specification
7146 */
7147 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7148 if (xmlStrEqual(val, BAD_CAST "default"))
7149 *(ctxt->space) = 0;
7150 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7151 *(ctxt->space) = 1;
7152 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007153 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007154"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007155 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007156 }
7157 }
7158
7159 *value = val;
7160 return(name);
7161}
7162
7163/**
7164 * xmlParseStartTag:
7165 * @ctxt: an XML parser context
7166 *
7167 * parse a start of tag either for rule element or
7168 * EmptyElement. In both case we don't parse the tag closing chars.
7169 *
7170 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7171 *
7172 * [ WFC: Unique Att Spec ]
7173 * No attribute name may appear more than once in the same start-tag or
7174 * empty-element tag.
7175 *
7176 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7177 *
7178 * [ WFC: Unique Att Spec ]
7179 * No attribute name may appear more than once in the same start-tag or
7180 * empty-element tag.
7181 *
7182 * With namespace:
7183 *
7184 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7185 *
7186 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7187 *
7188 * Returns the element name parsed
7189 */
7190
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007191const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007192xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007193 const xmlChar *name;
7194 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007196 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007197 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007198 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007199 int i;
7200
7201 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007202 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007203
7204 name = xmlParseName(ctxt);
7205 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007206 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007207 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007208 return(NULL);
7209 }
7210
7211 /*
7212 * Now parse the attributes, it ends up with the ending
7213 *
7214 * (S Attribute)* S?
7215 */
7216 SKIP_BLANKS;
7217 GROW;
7218
Daniel Veillard21a0f912001-02-25 19:54:14 +00007219 while ((RAW != '>') &&
7220 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007221 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007222 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007223 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007224
7225 attname = xmlParseAttribute(ctxt, &attvalue);
7226 if ((attname != NULL) && (attvalue != NULL)) {
7227 /*
7228 * [ WFC: Unique Att Spec ]
7229 * No attribute name may appear more than once in the same
7230 * start-tag or empty-element tag.
7231 */
7232 for (i = 0; i < nbatts;i += 2) {
7233 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007234 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007235 xmlFree(attvalue);
7236 goto failed;
7237 }
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239 /*
7240 * Add the pair to atts
7241 */
7242 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007243 maxatts = 22; /* allow for 10 attrs by default */
7244 atts = (const xmlChar **)
7245 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007246 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007247 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007248 if (attvalue != NULL)
7249 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007250 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007251 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007252 ctxt->atts = atts;
7253 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007254 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007255 const xmlChar **n;
7256
Owen Taylor3473f882001-02-23 17:55:21 +00007257 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007258 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007259 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007260 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007261 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007262 if (attvalue != NULL)
7263 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007264 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007265 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007266 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007267 ctxt->atts = atts;
7268 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007269 }
7270 atts[nbatts++] = attname;
7271 atts[nbatts++] = attvalue;
7272 atts[nbatts] = NULL;
7273 atts[nbatts + 1] = NULL;
7274 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007275 if (attvalue != NULL)
7276 xmlFree(attvalue);
7277 }
7278
7279failed:
7280
Daniel Veillard3772de32002-12-17 10:31:45 +00007281 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007282 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7283 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007284 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7286 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007287 }
7288 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007289 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7290 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007291 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7292 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007293 break;
7294 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007295 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007296 GROW;
7297 }
7298
7299 /*
7300 * SAX: Start of Element !
7301 */
7302 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007303 (!ctxt->disableSAX)) {
7304 if (nbatts > 0)
7305 ctxt->sax->startElement(ctxt->userData, name, atts);
7306 else
7307 ctxt->sax->startElement(ctxt->userData, name, NULL);
7308 }
Owen Taylor3473f882001-02-23 17:55:21 +00007309
7310 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007311 /* Free only the content strings */
7312 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007313 if (atts[i] != NULL)
7314 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007315 }
7316 return(name);
7317}
7318
7319/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007320 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007321 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007322 * @line: line of the start tag
7323 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007324 *
7325 * parse an end of tag
7326 *
7327 * [42] ETag ::= '</' Name S? '>'
7328 *
7329 * With namespace
7330 *
7331 * [NS 9] ETag ::= '</' QName S? '>'
7332 */
7333
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007334static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007335xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007337
7338 GROW;
7339 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007340 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007341 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007342 return;
7343 }
7344 SKIP(2);
7345
Daniel Veillard46de64e2002-05-29 08:21:33 +00007346 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007347
7348 /*
7349 * We should definitely be at the ending "S? '>'" part
7350 */
7351 GROW;
7352 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007353 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007355 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007356 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007357
7358 /*
7359 * [ WFC: Element Type Match ]
7360 * The Name in an element's end-tag must match the element type in the
7361 * start-tag.
7362 *
7363 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007364 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007365 if (name == NULL) name = BAD_CAST "unparseable";
7366 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007367 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007368 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007369 }
7370
7371 /*
7372 * SAX: End of Tag
7373 */
7374 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7375 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007376 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007377
Daniel Veillarde57ec792003-09-10 10:50:59 +00007378 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007379 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007380 return;
7381}
7382
7383/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007384 * xmlParseEndTag:
7385 * @ctxt: an XML parser context
7386 *
7387 * parse an end of tag
7388 *
7389 * [42] ETag ::= '</' Name S? '>'
7390 *
7391 * With namespace
7392 *
7393 * [NS 9] ETag ::= '</' QName S? '>'
7394 */
7395
7396void
7397xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 xmlParseEndTag1(ctxt, 0);
7399}
Daniel Veillard81273902003-09-30 00:43:48 +00007400#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007401
7402/************************************************************************
7403 * *
7404 * SAX 2 specific operations *
7405 * *
7406 ************************************************************************/
7407
7408static const xmlChar *
7409xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7410 int len = 0, l;
7411 int c;
7412 int count = 0;
7413
7414 /*
7415 * Handler for more complex cases
7416 */
7417 GROW;
7418 c = CUR_CHAR(l);
7419 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007421 return(NULL);
7422 }
7423
7424 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007425 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007426 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007427 (IS_COMBINING(c)) ||
7428 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429 if (count++ > 100) {
7430 count = 0;
7431 GROW;
7432 }
7433 len += l;
7434 NEXTL(l);
7435 c = CUR_CHAR(l);
7436 }
7437 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7438}
7439
7440/*
7441 * xmlGetNamespace:
7442 * @ctxt: an XML parser context
7443 * @prefix: the prefix to lookup
7444 *
7445 * Lookup the namespace name for the @prefix (which ca be NULL)
7446 * The prefix must come from the @ctxt->dict dictionnary
7447 *
7448 * Returns the namespace name or NULL if not bound
7449 */
7450static const xmlChar *
7451xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7452 int i;
7453
Daniel Veillarde57ec792003-09-10 10:50:59 +00007454 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007455 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007456 if (ctxt->nsTab[i] == prefix) {
7457 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7458 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007459 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007460 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 return(NULL);
7462}
7463
7464/**
7465 * xmlParseNCName:
7466 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007467 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007468 *
7469 * parse an XML name.
7470 *
7471 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7472 * CombiningChar | Extender
7473 *
7474 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7475 *
7476 * Returns the Name parsed or NULL
7477 */
7478
7479static const xmlChar *
7480xmlParseNCName(xmlParserCtxtPtr ctxt) {
7481 const xmlChar *in;
7482 const xmlChar *ret;
7483 int count = 0;
7484
7485 /*
7486 * Accelerator for simple ASCII names
7487 */
7488 in = ctxt->input->cur;
7489 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7490 ((*in >= 0x41) && (*in <= 0x5A)) ||
7491 (*in == '_')) {
7492 in++;
7493 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7494 ((*in >= 0x41) && (*in <= 0x5A)) ||
7495 ((*in >= 0x30) && (*in <= 0x39)) ||
7496 (*in == '_') || (*in == '-') ||
7497 (*in == '.'))
7498 in++;
7499 if ((*in > 0) && (*in < 0x80)) {
7500 count = in - ctxt->input->cur;
7501 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7502 ctxt->input->cur = in;
7503 ctxt->nbChars += count;
7504 ctxt->input->col += count;
7505 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007506 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007507 }
7508 return(ret);
7509 }
7510 }
7511 return(xmlParseNCNameComplex(ctxt));
7512}
7513
7514/**
7515 * xmlParseQName:
7516 * @ctxt: an XML parser context
7517 * @prefix: pointer to store the prefix part
7518 *
7519 * parse an XML Namespace QName
7520 *
7521 * [6] QName ::= (Prefix ':')? LocalPart
7522 * [7] Prefix ::= NCName
7523 * [8] LocalPart ::= NCName
7524 *
7525 * Returns the Name parsed or NULL
7526 */
7527
7528static const xmlChar *
7529xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7530 const xmlChar *l, *p;
7531
7532 GROW;
7533
7534 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007535 if (l == NULL) {
7536 if (CUR == ':') {
7537 l = xmlParseName(ctxt);
7538 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007539 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7540 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007541 *prefix = NULL;
7542 return(l);
7543 }
7544 }
7545 return(NULL);
7546 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007547 if (CUR == ':') {
7548 NEXT;
7549 p = l;
7550 l = xmlParseNCName(ctxt);
7551 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007552 xmlChar *tmp;
7553
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007554 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7555 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007556 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7557 p = xmlDictLookup(ctxt->dict, tmp, -1);
7558 if (tmp != NULL) xmlFree(tmp);
7559 *prefix = NULL;
7560 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561 }
7562 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007563 xmlChar *tmp;
7564
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007565 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7566 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007567 NEXT;
7568 tmp = (xmlChar *) xmlParseName(ctxt);
7569 if (tmp != NULL) {
7570 tmp = xmlBuildQName(tmp, l, NULL, 0);
7571 l = xmlDictLookup(ctxt->dict, tmp, -1);
7572 if (tmp != NULL) xmlFree(tmp);
7573 *prefix = p;
7574 return(l);
7575 }
7576 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7577 l = xmlDictLookup(ctxt->dict, tmp, -1);
7578 if (tmp != NULL) xmlFree(tmp);
7579 *prefix = p;
7580 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581 }
7582 *prefix = p;
7583 } else
7584 *prefix = NULL;
7585 return(l);
7586}
7587
7588/**
7589 * xmlParseQNameAndCompare:
7590 * @ctxt: an XML parser context
7591 * @name: the localname
7592 * @prefix: the prefix, if any.
7593 *
7594 * parse an XML name and compares for match
7595 * (specialized for endtag parsing)
7596 *
7597 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7598 * and the name for mismatch
7599 */
7600
7601static const xmlChar *
7602xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7603 xmlChar const *prefix) {
7604 const xmlChar *cmp = name;
7605 const xmlChar *in;
7606 const xmlChar *ret;
7607 const xmlChar *prefix2;
7608
7609 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7610
7611 GROW;
7612 in = ctxt->input->cur;
7613
7614 cmp = prefix;
7615 while (*in != 0 && *in == *cmp) {
7616 ++in;
7617 ++cmp;
7618 }
7619 if ((*cmp == 0) && (*in == ':')) {
7620 in++;
7621 cmp = name;
7622 while (*in != 0 && *in == *cmp) {
7623 ++in;
7624 ++cmp;
7625 }
William M. Brack76e95df2003-10-18 16:20:14 +00007626 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 /* success */
7628 ctxt->input->cur = in;
7629 return((const xmlChar*) 1);
7630 }
7631 }
7632 /*
7633 * all strings coms from the dictionary, equality can be done directly
7634 */
7635 ret = xmlParseQName (ctxt, &prefix2);
7636 if ((ret == name) && (prefix == prefix2))
7637 return((const xmlChar*) 1);
7638 return ret;
7639}
7640
7641/**
7642 * xmlParseAttValueInternal:
7643 * @ctxt: an XML parser context
7644 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007645 * @alloc: whether the attribute was reallocated as a new string
7646 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007647 *
7648 * parse a value for an attribute.
7649 * NOTE: if no normalization is needed, the routine will return pointers
7650 * directly from the data buffer.
7651 *
7652 * 3.3.3 Attribute-Value Normalization:
7653 * Before the value of an attribute is passed to the application or
7654 * checked for validity, the XML processor must normalize it as follows:
7655 * - a character reference is processed by appending the referenced
7656 * character to the attribute value
7657 * - an entity reference is processed by recursively processing the
7658 * replacement text of the entity
7659 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7660 * appending #x20 to the normalized value, except that only a single
7661 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7662 * parsed entity or the literal entity value of an internal parsed entity
7663 * - other characters are processed by appending them to the normalized value
7664 * If the declared value is not CDATA, then the XML processor must further
7665 * process the normalized attribute value by discarding any leading and
7666 * trailing space (#x20) characters, and by replacing sequences of space
7667 * (#x20) characters by a single space (#x20) character.
7668 * All attributes for which no declaration has been read should be treated
7669 * by a non-validating parser as if declared CDATA.
7670 *
7671 * Returns the AttValue parsed or NULL. The value has to be freed by the
7672 * caller if it was copied, this can be detected by val[*len] == 0.
7673 */
7674
7675static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007676xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7677 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007678{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007680 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007681 xmlChar *ret = NULL;
7682
7683 GROW;
7684 in = (xmlChar *) CUR_PTR;
7685 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007686 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 return (NULL);
7688 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007691 /*
7692 * try to handle in this routine the most common case where no
7693 * allocation of a new string is required and where content is
7694 * pure ASCII.
7695 */
7696 limit = *in++;
7697 end = ctxt->input->end;
7698 start = in;
7699 if (in >= end) {
7700 const xmlChar *oldbase = ctxt->input->base;
7701 GROW;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7705 in = in + delta;
7706 }
7707 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007709 if (normalize) {
7710 /*
7711 * Skip any leading spaces
7712 */
7713 while ((in < end) && (*in != limit) &&
7714 ((*in == 0x20) || (*in == 0x9) ||
7715 (*in == 0xA) || (*in == 0xD))) {
7716 in++;
7717 start = in;
7718 if (in >= end) {
7719 const xmlChar *oldbase = ctxt->input->base;
7720 GROW;
7721 if (oldbase != ctxt->input->base) {
7722 long delta = ctxt->input->base - oldbase;
7723 start = start + delta;
7724 in = in + delta;
7725 }
7726 end = ctxt->input->end;
7727 }
7728 }
7729 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7730 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7731 if ((*in++ == 0x20) && (*in == 0x20)) break;
7732 if (in >= end) {
7733 const xmlChar *oldbase = ctxt->input->base;
7734 GROW;
7735 if (oldbase != ctxt->input->base) {
7736 long delta = ctxt->input->base - oldbase;
7737 start = start + delta;
7738 in = in + delta;
7739 }
7740 end = ctxt->input->end;
7741 }
7742 }
7743 last = in;
7744 /*
7745 * skip the trailing blanks
7746 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007747 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007748 while ((in < end) && (*in != limit) &&
7749 ((*in == 0x20) || (*in == 0x9) ||
7750 (*in == 0xA) || (*in == 0xD))) {
7751 in++;
7752 if (in >= end) {
7753 const xmlChar *oldbase = ctxt->input->base;
7754 GROW;
7755 if (oldbase != ctxt->input->base) {
7756 long delta = ctxt->input->base - oldbase;
7757 start = start + delta;
7758 in = in + delta;
7759 last = last + delta;
7760 }
7761 end = ctxt->input->end;
7762 }
7763 }
7764 if (*in != limit) goto need_complex;
7765 } else {
7766 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7767 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7768 in++;
7769 if (in >= end) {
7770 const xmlChar *oldbase = ctxt->input->base;
7771 GROW;
7772 if (oldbase != ctxt->input->base) {
7773 long delta = ctxt->input->base - oldbase;
7774 start = start + delta;
7775 in = in + delta;
7776 }
7777 end = ctxt->input->end;
7778 }
7779 }
7780 last = in;
7781 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007783 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007785 *len = last - start;
7786 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007788 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007789 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007790 }
7791 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007792 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007794need_complex:
7795 if (alloc) *alloc = 1;
7796 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007797}
7798
7799/**
7800 * xmlParseAttribute2:
7801 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007802 * @pref: the element prefix
7803 * @elem: the element name
7804 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007805 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007806 * @len: an int * to save the length of the attribute
7807 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 *
7809 * parse an attribute in the new SAX2 framework.
7810 *
7811 * Returns the attribute name, and the value in *value, .
7812 */
7813
7814static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007815xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7816 const xmlChar *pref, const xmlChar *elem,
7817 const xmlChar **prefix, xmlChar **value,
7818 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007819 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007820 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007821 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822
7823 *value = NULL;
7824 GROW;
7825 name = xmlParseQName(ctxt, prefix);
7826 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007827 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7828 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007829 return(NULL);
7830 }
7831
7832 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007833 * get the type if needed
7834 */
7835 if (ctxt->attsSpecial != NULL) {
7836 int type;
7837
7838 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7839 pref, elem, *prefix, name);
7840 if (type != 0) normalize = 1;
7841 }
7842
7843 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 * read the value
7845 */
7846 SKIP_BLANKS;
7847 if (RAW == '=') {
7848 NEXT;
7849 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007850 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007851 ctxt->instate = XML_PARSER_CONTENT;
7852 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007853 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007854 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007855 return(NULL);
7856 }
7857
Daniel Veillardd8925572005-06-08 22:34:55 +00007858 if (*prefix == ctxt->str_xml) {
7859 /*
7860 * Check that xml:lang conforms to the specification
7861 * No more registered as an error, just generate a warning now
7862 * since this was deprecated in XML second edition
7863 */
7864 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7865 internal_val = xmlStrndup(val, *len);
7866 if (!xmlCheckLanguageID(internal_val)) {
7867 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7868 "Malformed value for xml:lang : %s\n",
7869 internal_val, NULL);
7870 }
7871 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872
Daniel Veillardd8925572005-06-08 22:34:55 +00007873 /*
7874 * Check that xml:space conforms to the specification
7875 */
7876 if (xmlStrEqual(name, BAD_CAST "space")) {
7877 internal_val = xmlStrndup(val, *len);
7878 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7879 *(ctxt->space) = 0;
7880 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7881 *(ctxt->space) = 1;
7882 else {
7883 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007885 internal_val, NULL);
7886 }
7887 }
7888 if (internal_val) {
7889 xmlFree(internal_val);
7890 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007892
7893 *value = val;
7894 return(name);
7895}
7896
7897/**
7898 * xmlParseStartTag2:
7899 * @ctxt: an XML parser context
7900 *
7901 * parse a start of tag either for rule element or
7902 * EmptyElement. In both case we don't parse the tag closing chars.
7903 * This routine is called when running SAX2 parsing
7904 *
7905 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7906 *
7907 * [ WFC: Unique Att Spec ]
7908 * No attribute name may appear more than once in the same start-tag or
7909 * empty-element tag.
7910 *
7911 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7912 *
7913 * [ WFC: Unique Att Spec ]
7914 * No attribute name may appear more than once in the same start-tag or
7915 * empty-element tag.
7916 *
7917 * With namespace:
7918 *
7919 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7920 *
7921 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7922 *
7923 * Returns the element name parsed
7924 */
7925
7926static const xmlChar *
7927xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007928 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 const xmlChar *localname;
7930 const xmlChar *prefix;
7931 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007932 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 const xmlChar *nsname;
7934 xmlChar *attvalue;
7935 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007937 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007938 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 const xmlChar *base;
7940 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007941 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942
7943 if (RAW != '<') return(NULL);
7944 NEXT1;
7945
7946 /*
7947 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7948 * point since the attribute values may be stored as pointers to
7949 * the buffer and calling SHRINK would destroy them !
7950 * The Shrinking is only possible once the full set of attribute
7951 * callbacks have been done.
7952 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007953reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 base = ctxt->input->base;
7956 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007957 oldline = ctxt->input->line;
7958 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007959 nbatts = 0;
7960 nratts = 0;
7961 nbdef = 0;
7962 nbNs = 0;
7963 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007964 /* Forget any namespaces added during an earlier parse of this element. */
7965 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966
7967 localname = xmlParseQName(ctxt, &prefix);
7968 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007969 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7970 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007971 return(NULL);
7972 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007973 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974
7975 /*
7976 * Now parse the attributes, it ends up with the ending
7977 *
7978 * (S Attribute)* S?
7979 */
7980 SKIP_BLANKS;
7981 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007982 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983
7984 while ((RAW != '>') &&
7985 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007986 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 const xmlChar *q = CUR_PTR;
7988 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007989 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007991 attname = xmlParseAttribute2(ctxt, prefix, localname,
7992 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007993 if (ctxt->input->base != base) {
7994 if ((attvalue != NULL) && (alloc != 0))
7995 xmlFree(attvalue);
7996 attvalue = NULL;
7997 goto base_changed;
7998 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007999 if ((attname != NULL) && (attvalue != NULL)) {
8000 if (len < 0) len = xmlStrlen(attvalue);
8001 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008002 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8003 xmlURIPtr uri;
8004
8005 if (*URL != 0) {
8006 uri = xmlParseURI((const char *) URL);
8007 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008008 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8009 "xmlns: %s not a valid URI\n",
8010 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008011 } else {
8012 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008013 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8014 "xmlns: URI %s is not absolute\n",
8015 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008016 }
8017 xmlFreeURI(uri);
8018 }
8019 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008020 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008021 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008023 for (j = 1;j <= nbNs;j++)
8024 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8025 break;
8026 if (j <= nbNs)
8027 xmlErrAttributeDup(ctxt, NULL, attname);
8028 else
8029 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008030 if (alloc != 0) xmlFree(attvalue);
8031 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008032 continue;
8033 }
8034 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008035 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8036 xmlURIPtr uri;
8037
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008038 if (attname == ctxt->str_xml) {
8039 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008040 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8041 "xml namespace prefix mapped to wrong URI\n",
8042 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008043 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008044 /*
8045 * Do not keep a namespace definition node
8046 */
8047 if (alloc != 0) xmlFree(attvalue);
8048 SKIP_BLANKS;
8049 continue;
8050 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008051 uri = xmlParseURI((const char *) URL);
8052 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008053 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8054 "xmlns:%s: '%s' is not a valid URI\n",
8055 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008056 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008057 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008058 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8059 "xmlns:%s: URI %s is not absolute\n",
8060 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008061 }
8062 xmlFreeURI(uri);
8063 }
8064
Daniel Veillard0fb18932003-09-07 09:14:37 +00008065 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008066 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008068 for (j = 1;j <= nbNs;j++)
8069 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8070 break;
8071 if (j <= nbNs)
8072 xmlErrAttributeDup(ctxt, aprefix, attname);
8073 else
8074 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008075 if (alloc != 0) xmlFree(attvalue);
8076 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008077 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 continue;
8079 }
8080
8081 /*
8082 * Add the pair to atts
8083 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008084 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8085 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008086 if (attvalue[len] == 0)
8087 xmlFree(attvalue);
8088 goto failed;
8089 }
8090 maxatts = ctxt->maxatts;
8091 atts = ctxt->atts;
8092 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008093 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008094 atts[nbatts++] = attname;
8095 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008096 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008097 atts[nbatts++] = attvalue;
8098 attvalue += len;
8099 atts[nbatts++] = attvalue;
8100 /*
8101 * tag if some deallocation is needed
8102 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008103 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008104 } else {
8105 if ((attvalue != NULL) && (attvalue[len] == 0))
8106 xmlFree(attvalue);
8107 }
8108
8109failed:
8110
8111 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008112 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008113 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8114 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008115 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8117 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008118 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008119 }
8120 SKIP_BLANKS;
8121 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8122 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008123 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008124 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008125 break;
8126 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008127 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008128 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008129 }
8130
Daniel Veillard0fb18932003-09-07 09:14:37 +00008131 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008132 * The attributes defaulting
8133 */
8134 if (ctxt->attsDefault != NULL) {
8135 xmlDefAttrsPtr defaults;
8136
8137 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8138 if (defaults != NULL) {
8139 for (i = 0;i < defaults->nbAttrs;i++) {
8140 attname = defaults->values[4 * i];
8141 aprefix = defaults->values[4 * i + 1];
8142
8143 /*
8144 * special work for namespaces defaulted defs
8145 */
8146 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8147 /*
8148 * check that it's not a defined namespace
8149 */
8150 for (j = 1;j <= nbNs;j++)
8151 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8152 break;
8153 if (j <= nbNs) continue;
8154
8155 nsname = xmlGetNamespace(ctxt, NULL);
8156 if (nsname != defaults->values[4 * i + 2]) {
8157 if (nsPush(ctxt, NULL,
8158 defaults->values[4 * i + 2]) > 0)
8159 nbNs++;
8160 }
8161 } else if (aprefix == ctxt->str_xmlns) {
8162 /*
8163 * check that it's not a defined namespace
8164 */
8165 for (j = 1;j <= nbNs;j++)
8166 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8167 break;
8168 if (j <= nbNs) continue;
8169
8170 nsname = xmlGetNamespace(ctxt, attname);
8171 if (nsname != defaults->values[2]) {
8172 if (nsPush(ctxt, attname,
8173 defaults->values[4 * i + 2]) > 0)
8174 nbNs++;
8175 }
8176 } else {
8177 /*
8178 * check that it's not a defined attribute
8179 */
8180 for (j = 0;j < nbatts;j+=5) {
8181 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8182 break;
8183 }
8184 if (j < nbatts) continue;
8185
8186 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8187 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008188 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008189 }
8190 maxatts = ctxt->maxatts;
8191 atts = ctxt->atts;
8192 }
8193 atts[nbatts++] = attname;
8194 atts[nbatts++] = aprefix;
8195 if (aprefix == NULL)
8196 atts[nbatts++] = NULL;
8197 else
8198 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8199 atts[nbatts++] = defaults->values[4 * i + 2];
8200 atts[nbatts++] = defaults->values[4 * i + 3];
8201 nbdef++;
8202 }
8203 }
8204 }
8205 }
8206
Daniel Veillarde70c8772003-11-25 07:21:18 +00008207 /*
8208 * The attributes checkings
8209 */
8210 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008211 /*
8212 * The default namespace does not apply to attribute names.
8213 */
8214 if (atts[i + 1] != NULL) {
8215 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8216 if (nsname == NULL) {
8217 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8218 "Namespace prefix %s for %s on %s is not defined\n",
8219 atts[i + 1], atts[i], localname);
8220 }
8221 atts[i + 2] = nsname;
8222 } else
8223 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008224 /*
8225 * [ WFC: Unique Att Spec ]
8226 * No attribute name may appear more than once in the same
8227 * start-tag or empty-element tag.
8228 * As extended by the Namespace in XML REC.
8229 */
8230 for (j = 0; j < i;j += 5) {
8231 if (atts[i] == atts[j]) {
8232 if (atts[i+1] == atts[j+1]) {
8233 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8234 break;
8235 }
8236 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8237 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8238 "Namespaced Attribute %s in '%s' redefined\n",
8239 atts[i], nsname, NULL);
8240 break;
8241 }
8242 }
8243 }
8244 }
8245
Daniel Veillarde57ec792003-09-10 10:50:59 +00008246 nsname = xmlGetNamespace(ctxt, prefix);
8247 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008248 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8249 "Namespace prefix %s on %s is not defined\n",
8250 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008251 }
8252 *pref = prefix;
8253 *URI = nsname;
8254
8255 /*
8256 * SAX: Start of Element !
8257 */
8258 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8259 (!ctxt->disableSAX)) {
8260 if (nbNs > 0)
8261 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8262 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8263 nbatts / 5, nbdef, atts);
8264 else
8265 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8266 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8267 }
8268
8269 /*
8270 * Free up attribute allocated strings if needed
8271 */
8272 if (attval != 0) {
8273 for (i = 3,j = 0; j < nratts;i += 5,j++)
8274 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8275 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 }
8277
8278 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008279
8280base_changed:
8281 /*
8282 * the attribute strings are valid iif the base didn't changed
8283 */
8284 if (attval != 0) {
8285 for (i = 3,j = 0; j < nratts;i += 5,j++)
8286 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8287 xmlFree((xmlChar *) atts[i]);
8288 }
8289 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008290 ctxt->input->line = oldline;
8291 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008292 if (ctxt->wellFormed == 1) {
8293 goto reparse;
8294 }
8295 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296}
8297
8298/**
8299 * xmlParseEndTag2:
8300 * @ctxt: an XML parser context
8301 * @line: line of the start tag
8302 * @nsNr: number of namespaces on the start tag
8303 *
8304 * parse an end of tag
8305 *
8306 * [42] ETag ::= '</' Name S? '>'
8307 *
8308 * With namespace
8309 *
8310 * [NS 9] ETag ::= '</' QName S? '>'
8311 */
8312
8313static void
8314xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008315 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 const xmlChar *name;
8317
8318 GROW;
8319 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008320 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008321 return;
8322 }
8323 SKIP(2);
8324
William M. Brack13dfa872004-09-18 04:52:08 +00008325 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008326 if (ctxt->input->cur[tlen] == '>') {
8327 ctxt->input->cur += tlen + 1;
8328 goto done;
8329 }
8330 ctxt->input->cur += tlen;
8331 name = (xmlChar*)1;
8332 } else {
8333 if (prefix == NULL)
8334 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8335 else
8336 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8337 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338
8339 /*
8340 * We should definitely be at the ending "S? '>'" part
8341 */
8342 GROW;
8343 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008344 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008345 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008346 } else
8347 NEXT1;
8348
8349 /*
8350 * [ WFC: Element Type Match ]
8351 * The Name in an element's end-tag must match the element type in the
8352 * start-tag.
8353 *
8354 */
8355 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008356 if (name == NULL) name = BAD_CAST "unparseable";
8357 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008358 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008359 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008360 }
8361
8362 /*
8363 * SAX: End of Tag
8364 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008365done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008366 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8367 (!ctxt->disableSAX))
8368 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8369
Daniel Veillard0fb18932003-09-07 09:14:37 +00008370 spacePop(ctxt);
8371 if (nsNr != 0)
8372 nsPop(ctxt, nsNr);
8373 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008374}
8375
8376/**
Owen Taylor3473f882001-02-23 17:55:21 +00008377 * xmlParseCDSect:
8378 * @ctxt: an XML parser context
8379 *
8380 * Parse escaped pure raw content.
8381 *
8382 * [18] CDSect ::= CDStart CData CDEnd
8383 *
8384 * [19] CDStart ::= '<![CDATA['
8385 *
8386 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8387 *
8388 * [21] CDEnd ::= ']]>'
8389 */
8390void
8391xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8392 xmlChar *buf = NULL;
8393 int len = 0;
8394 int size = XML_PARSER_BUFFER_SIZE;
8395 int r, rl;
8396 int s, sl;
8397 int cur, l;
8398 int count = 0;
8399
Daniel Veillard8f597c32003-10-06 08:19:27 +00008400 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008401 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008402 SKIP(9);
8403 } else
8404 return;
8405
8406 ctxt->instate = XML_PARSER_CDATA_SECTION;
8407 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008408 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008409 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008410 ctxt->instate = XML_PARSER_CONTENT;
8411 return;
8412 }
8413 NEXTL(rl);
8414 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008415 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 ctxt->instate = XML_PARSER_CONTENT;
8418 return;
8419 }
8420 NEXTL(sl);
8421 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008422 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008423 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008424 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008425 return;
8426 }
William M. Brack871611b2003-10-18 04:53:14 +00008427 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008428 ((r != ']') || (s != ']') || (cur != '>'))) {
8429 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008430 xmlChar *tmp;
8431
Owen Taylor3473f882001-02-23 17:55:21 +00008432 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008433 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8434 if (tmp == NULL) {
8435 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008436 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008437 return;
8438 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008439 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 }
8441 COPY_BUF(rl,buf,len,r);
8442 r = s;
8443 rl = sl;
8444 s = cur;
8445 sl = l;
8446 count++;
8447 if (count > 50) {
8448 GROW;
8449 count = 0;
8450 }
8451 NEXTL(l);
8452 cur = CUR_CHAR(l);
8453 }
8454 buf[len] = 0;
8455 ctxt->instate = XML_PARSER_CONTENT;
8456 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008457 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008458 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008459 xmlFree(buf);
8460 return;
8461 }
8462 NEXTL(l);
8463
8464 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008465 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008466 */
8467 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8468 if (ctxt->sax->cdataBlock != NULL)
8469 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008470 else if (ctxt->sax->characters != NULL)
8471 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008472 }
8473 xmlFree(buf);
8474}
8475
8476/**
8477 * xmlParseContent:
8478 * @ctxt: an XML parser context
8479 *
8480 * Parse a content:
8481 *
8482 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8483 */
8484
8485void
8486xmlParseContent(xmlParserCtxtPtr ctxt) {
8487 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008488 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008489 ((RAW != '<') || (NXT(1) != '/')) &&
8490 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008491 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008492 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008493 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008494
8495 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008496 * First case : a Processing Instruction.
8497 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008498 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008499 xmlParsePI(ctxt);
8500 }
8501
8502 /*
8503 * Second case : a CDSection
8504 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008505 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008506 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008507 xmlParseCDSect(ctxt);
8508 }
8509
8510 /*
8511 * Third case : a comment
8512 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008513 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008514 (NXT(2) == '-') && (NXT(3) == '-')) {
8515 xmlParseComment(ctxt);
8516 ctxt->instate = XML_PARSER_CONTENT;
8517 }
8518
8519 /*
8520 * Fourth case : a sub-element.
8521 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008522 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008523 xmlParseElement(ctxt);
8524 }
8525
8526 /*
8527 * Fifth case : a reference. If if has not been resolved,
8528 * parsing returns it's Name, create the node
8529 */
8530
Daniel Veillard21a0f912001-02-25 19:54:14 +00008531 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008532 xmlParseReference(ctxt);
8533 }
8534
8535 /*
8536 * Last case, text. Note that References are handled directly.
8537 */
8538 else {
8539 xmlParseCharData(ctxt, 0);
8540 }
8541
8542 GROW;
8543 /*
8544 * Pop-up of finished entities.
8545 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008546 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008547 xmlPopInput(ctxt);
8548 SHRINK;
8549
Daniel Veillardfdc91562002-07-01 21:52:03 +00008550 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008551 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8552 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008553 ctxt->instate = XML_PARSER_EOF;
8554 break;
8555 }
8556 }
8557}
8558
8559/**
8560 * xmlParseElement:
8561 * @ctxt: an XML parser context
8562 *
8563 * parse an XML element, this is highly recursive
8564 *
8565 * [39] element ::= EmptyElemTag | STag content ETag
8566 *
8567 * [ WFC: Element Type Match ]
8568 * The Name in an element's end-tag must match the element type in the
8569 * start-tag.
8570 *
Owen Taylor3473f882001-02-23 17:55:21 +00008571 */
8572
8573void
8574xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008575 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008576 const xmlChar *prefix;
8577 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008578 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008579 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008580 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008582
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008583 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8584 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8585 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8586 xmlParserMaxDepth);
8587 ctxt->instate = XML_PARSER_EOF;
8588 return;
8589 }
8590
Owen Taylor3473f882001-02-23 17:55:21 +00008591 /* Capture start position */
8592 if (ctxt->record_info) {
8593 node_info.begin_pos = ctxt->input->consumed +
8594 (CUR_PTR - ctxt->input->base);
8595 node_info.begin_line = ctxt->input->line;
8596 }
8597
8598 if (ctxt->spaceNr == 0)
8599 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008600 else if (*ctxt->space == -2)
8601 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008602 else
8603 spacePush(ctxt, *ctxt->space);
8604
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008605 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008606#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008607 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008608#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008609 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008610#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008611 else
8612 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008613#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008614 if (name == NULL) {
8615 spacePop(ctxt);
8616 return;
8617 }
8618 namePush(ctxt, name);
8619 ret = ctxt->node;
8620
Daniel Veillard4432df22003-09-28 18:58:27 +00008621#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008622 /*
8623 * [ VC: Root Element Type ]
8624 * The Name in the document type declaration must match the element
8625 * type of the root element.
8626 */
8627 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8628 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8629 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008630#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008631
8632 /*
8633 * Check for an Empty Element.
8634 */
8635 if ((RAW == '/') && (NXT(1) == '>')) {
8636 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 if (ctxt->sax2) {
8638 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8639 (!ctxt->disableSAX))
8640 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008641#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 } else {
8643 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8644 (!ctxt->disableSAX))
8645 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008646#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008647 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008648 namePop(ctxt);
8649 spacePop(ctxt);
8650 if (nsNr != ctxt->nsNr)
8651 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008652 if ( ret != NULL && ctxt->record_info ) {
8653 node_info.end_pos = ctxt->input->consumed +
8654 (CUR_PTR - ctxt->input->base);
8655 node_info.end_line = ctxt->input->line;
8656 node_info.node = ret;
8657 xmlParserAddNodeInfo(ctxt, &node_info);
8658 }
8659 return;
8660 }
8661 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008662 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008663 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008664 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8665 "Couldn't find end of Start Tag %s line %d\n",
8666 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008667
8668 /*
8669 * end of parsing of this node.
8670 */
8671 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008673 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 if (nsNr != ctxt->nsNr)
8675 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008676
8677 /*
8678 * Capture end position and add node
8679 */
8680 if ( ret != NULL && ctxt->record_info ) {
8681 node_info.end_pos = ctxt->input->consumed +
8682 (CUR_PTR - ctxt->input->base);
8683 node_info.end_line = ctxt->input->line;
8684 node_info.node = ret;
8685 xmlParserAddNodeInfo(ctxt, &node_info);
8686 }
8687 return;
8688 }
8689
8690 /*
8691 * Parse the content of the element:
8692 */
8693 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008694 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008695 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008696 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008697 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008698
8699 /*
8700 * end of parsing of this node.
8701 */
8702 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008703 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008704 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 if (nsNr != ctxt->nsNr)
8706 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 return;
8708 }
8709
8710 /*
8711 * parse the end of tag: '</' should be here.
8712 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008714 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008715 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008716 }
8717#ifdef LIBXML_SAX1_ENABLED
8718 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008719 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008720#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008721
8722 /*
8723 * Capture end position and add node
8724 */
8725 if ( ret != NULL && ctxt->record_info ) {
8726 node_info.end_pos = ctxt->input->consumed +
8727 (CUR_PTR - ctxt->input->base);
8728 node_info.end_line = ctxt->input->line;
8729 node_info.node = ret;
8730 xmlParserAddNodeInfo(ctxt, &node_info);
8731 }
8732}
8733
8734/**
8735 * xmlParseVersionNum:
8736 * @ctxt: an XML parser context
8737 *
8738 * parse the XML version value.
8739 *
8740 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8741 *
8742 * Returns the string giving the XML version number, or NULL
8743 */
8744xmlChar *
8745xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8746 xmlChar *buf = NULL;
8747 int len = 0;
8748 int size = 10;
8749 xmlChar cur;
8750
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008751 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008752 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008753 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008754 return(NULL);
8755 }
8756 cur = CUR;
8757 while (((cur >= 'a') && (cur <= 'z')) ||
8758 ((cur >= 'A') && (cur <= 'Z')) ||
8759 ((cur >= '0') && (cur <= '9')) ||
8760 (cur == '_') || (cur == '.') ||
8761 (cur == ':') || (cur == '-')) {
8762 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008763 xmlChar *tmp;
8764
Owen Taylor3473f882001-02-23 17:55:21 +00008765 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008766 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8767 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008768 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008769 return(NULL);
8770 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008771 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008772 }
8773 buf[len++] = cur;
8774 NEXT;
8775 cur=CUR;
8776 }
8777 buf[len] = 0;
8778 return(buf);
8779}
8780
8781/**
8782 * xmlParseVersionInfo:
8783 * @ctxt: an XML parser context
8784 *
8785 * parse the XML version.
8786 *
8787 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8788 *
8789 * [25] Eq ::= S? '=' S?
8790 *
8791 * Returns the version string, e.g. "1.0"
8792 */
8793
8794xmlChar *
8795xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8796 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008797
Daniel Veillarda07050d2003-10-19 14:46:32 +00008798 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008799 SKIP(7);
8800 SKIP_BLANKS;
8801 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008802 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008803 return(NULL);
8804 }
8805 NEXT;
8806 SKIP_BLANKS;
8807 if (RAW == '"') {
8808 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008809 version = xmlParseVersionNum(ctxt);
8810 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008811 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008812 } else
8813 NEXT;
8814 } else if (RAW == '\''){
8815 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008816 version = xmlParseVersionNum(ctxt);
8817 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008818 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008819 } else
8820 NEXT;
8821 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 }
8824 }
8825 return(version);
8826}
8827
8828/**
8829 * xmlParseEncName:
8830 * @ctxt: an XML parser context
8831 *
8832 * parse the XML encoding name
8833 *
8834 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8835 *
8836 * Returns the encoding name value or NULL
8837 */
8838xmlChar *
8839xmlParseEncName(xmlParserCtxtPtr ctxt) {
8840 xmlChar *buf = NULL;
8841 int len = 0;
8842 int size = 10;
8843 xmlChar cur;
8844
8845 cur = CUR;
8846 if (((cur >= 'a') && (cur <= 'z')) ||
8847 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008848 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008849 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008851 return(NULL);
8852 }
8853
8854 buf[len++] = cur;
8855 NEXT;
8856 cur = CUR;
8857 while (((cur >= 'a') && (cur <= 'z')) ||
8858 ((cur >= 'A') && (cur <= 'Z')) ||
8859 ((cur >= '0') && (cur <= '9')) ||
8860 (cur == '.') || (cur == '_') ||
8861 (cur == '-')) {
8862 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008863 xmlChar *tmp;
8864
Owen Taylor3473f882001-02-23 17:55:21 +00008865 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008866 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8867 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008869 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008870 return(NULL);
8871 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008872 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008873 }
8874 buf[len++] = cur;
8875 NEXT;
8876 cur = CUR;
8877 if (cur == 0) {
8878 SHRINK;
8879 GROW;
8880 cur = CUR;
8881 }
8882 }
8883 buf[len] = 0;
8884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008885 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008886 }
8887 return(buf);
8888}
8889
8890/**
8891 * xmlParseEncodingDecl:
8892 * @ctxt: an XML parser context
8893 *
8894 * parse the XML encoding declaration
8895 *
8896 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8897 *
8898 * this setups the conversion filters.
8899 *
8900 * Returns the encoding value or NULL
8901 */
8902
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008903const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008904xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8905 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008906
8907 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008908 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008909 SKIP(8);
8910 SKIP_BLANKS;
8911 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008912 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008913 return(NULL);
8914 }
8915 NEXT;
8916 SKIP_BLANKS;
8917 if (RAW == '"') {
8918 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008919 encoding = xmlParseEncName(ctxt);
8920 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008921 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008922 } else
8923 NEXT;
8924 } else if (RAW == '\''){
8925 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008926 encoding = xmlParseEncName(ctxt);
8927 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008928 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008929 } else
8930 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008931 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008933 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008934 /*
8935 * UTF-16 encoding stwich has already taken place at this stage,
8936 * more over the little-endian/big-endian selection is already done
8937 */
8938 if ((encoding != NULL) &&
8939 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8940 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008941 if (ctxt->encoding != NULL)
8942 xmlFree((xmlChar *) ctxt->encoding);
8943 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008944 }
8945 /*
8946 * UTF-8 encoding is handled natively
8947 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008948 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008949 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8950 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008951 if (ctxt->encoding != NULL)
8952 xmlFree((xmlChar *) ctxt->encoding);
8953 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008954 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008955 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008956 xmlCharEncodingHandlerPtr handler;
8957
8958 if (ctxt->input->encoding != NULL)
8959 xmlFree((xmlChar *) ctxt->input->encoding);
8960 ctxt->input->encoding = encoding;
8961
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008962 handler = xmlFindCharEncodingHandler((const char *) encoding);
8963 if (handler != NULL) {
8964 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008967 "Unsupported encoding %s\n", encoding);
8968 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 }
8970 }
8971 }
8972 return(encoding);
8973}
8974
8975/**
8976 * xmlParseSDDecl:
8977 * @ctxt: an XML parser context
8978 *
8979 * parse the XML standalone declaration
8980 *
8981 * [32] SDDecl ::= S 'standalone' Eq
8982 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8983 *
8984 * [ VC: Standalone Document Declaration ]
8985 * TODO The standalone document declaration must have the value "no"
8986 * if any external markup declarations contain declarations of:
8987 * - attributes with default values, if elements to which these
8988 * attributes apply appear in the document without specifications
8989 * of values for these attributes, or
8990 * - entities (other than amp, lt, gt, apos, quot), if references
8991 * to those entities appear in the document, or
8992 * - attributes with values subject to normalization, where the
8993 * attribute appears in the document with a value which will change
8994 * as a result of normalization, or
8995 * - element types with element content, if white space occurs directly
8996 * within any instance of those types.
8997 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008998 * Returns:
8999 * 1 if standalone="yes"
9000 * 0 if standalone="no"
9001 * -2 if standalone attribute is missing or invalid
9002 * (A standalone value of -2 means that the XML declaration was found,
9003 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009004 */
9005
9006int
9007xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009008 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009009
9010 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009011 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009012 SKIP(10);
9013 SKIP_BLANKS;
9014 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009015 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009016 return(standalone);
9017 }
9018 NEXT;
9019 SKIP_BLANKS;
9020 if (RAW == '\''){
9021 NEXT;
9022 if ((RAW == 'n') && (NXT(1) == 'o')) {
9023 standalone = 0;
9024 SKIP(2);
9025 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9026 (NXT(2) == 's')) {
9027 standalone = 1;
9028 SKIP(3);
9029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009030 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009031 }
9032 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009034 } else
9035 NEXT;
9036 } else if (RAW == '"'){
9037 NEXT;
9038 if ((RAW == 'n') && (NXT(1) == 'o')) {
9039 standalone = 0;
9040 SKIP(2);
9041 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9042 (NXT(2) == 's')) {
9043 standalone = 1;
9044 SKIP(3);
9045 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009046 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009047 }
9048 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009049 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009050 } else
9051 NEXT;
9052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009053 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009054 }
9055 }
9056 return(standalone);
9057}
9058
9059/**
9060 * xmlParseXMLDecl:
9061 * @ctxt: an XML parser context
9062 *
9063 * parse an XML declaration header
9064 *
9065 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9066 */
9067
9068void
9069xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9070 xmlChar *version;
9071
9072 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009073 * This value for standalone indicates that the document has an
9074 * XML declaration but it does not have a standalone attribute.
9075 * It will be overwritten later if a standalone attribute is found.
9076 */
9077 ctxt->input->standalone = -2;
9078
9079 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009080 * We know that '<?xml' is here.
9081 */
9082 SKIP(5);
9083
William M. Brack76e95df2003-10-18 16:20:14 +00009084 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9086 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009087 }
9088 SKIP_BLANKS;
9089
9090 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009091 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009092 */
9093 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009094 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009095 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009096 } else {
9097 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9098 /*
9099 * TODO: Blueberry should be detected here
9100 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009101 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9102 "Unsupported version '%s'\n",
9103 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009104 }
9105 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009106 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009107 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009108 }
Owen Taylor3473f882001-02-23 17:55:21 +00009109
9110 /*
9111 * We may have the encoding declaration
9112 */
William M. Brack76e95df2003-10-18 16:20:14 +00009113 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009114 if ((RAW == '?') && (NXT(1) == '>')) {
9115 SKIP(2);
9116 return;
9117 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009119 }
9120 xmlParseEncodingDecl(ctxt);
9121 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9122 /*
9123 * The XML REC instructs us to stop parsing right here
9124 */
9125 return;
9126 }
9127
9128 /*
9129 * We may have the standalone status.
9130 */
William M. Brack76e95df2003-10-18 16:20:14 +00009131 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009132 if ((RAW == '?') && (NXT(1) == '>')) {
9133 SKIP(2);
9134 return;
9135 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009137 }
9138 SKIP_BLANKS;
9139 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9140
9141 SKIP_BLANKS;
9142 if ((RAW == '?') && (NXT(1) == '>')) {
9143 SKIP(2);
9144 } else if (RAW == '>') {
9145 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009146 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009147 NEXT;
9148 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009149 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009150 MOVETO_ENDTAG(CUR_PTR);
9151 NEXT;
9152 }
9153}
9154
9155/**
9156 * xmlParseMisc:
9157 * @ctxt: an XML parser context
9158 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009159 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009160 *
9161 * [27] Misc ::= Comment | PI | S
9162 */
9163
9164void
9165xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009166 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009167 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009168 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009169 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009170 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009171 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009172 NEXT;
9173 } else
9174 xmlParseComment(ctxt);
9175 }
9176}
9177
9178/**
9179 * xmlParseDocument:
9180 * @ctxt: an XML parser context
9181 *
9182 * parse an XML document (and build a tree if using the standard SAX
9183 * interface).
9184 *
9185 * [1] document ::= prolog element Misc*
9186 *
9187 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9188 *
9189 * Returns 0, -1 in case of error. the parser context is augmented
9190 * as a result of the parsing.
9191 */
9192
9193int
9194xmlParseDocument(xmlParserCtxtPtr ctxt) {
9195 xmlChar start[4];
9196 xmlCharEncoding enc;
9197
9198 xmlInitParser();
9199
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009200 if ((ctxt == NULL) || (ctxt->input == NULL))
9201 return(-1);
9202
Owen Taylor3473f882001-02-23 17:55:21 +00009203 GROW;
9204
9205 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206 * SAX: detecting the level.
9207 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009208 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009209
9210 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009211 * SAX: beginning of the document processing.
9212 */
9213 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9214 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9215
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009216 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9217 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009218 /*
9219 * Get the 4 first bytes and decode the charset
9220 * if enc != XML_CHAR_ENCODING_NONE
9221 * plug some encoding conversion routines.
9222 */
9223 start[0] = RAW;
9224 start[1] = NXT(1);
9225 start[2] = NXT(2);
9226 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009227 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009228 if (enc != XML_CHAR_ENCODING_NONE) {
9229 xmlSwitchEncoding(ctxt, enc);
9230 }
Owen Taylor3473f882001-02-23 17:55:21 +00009231 }
9232
9233
9234 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009235 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009236 }
9237
9238 /*
9239 * Check for the XMLDecl in the Prolog.
9240 */
9241 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009242 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009243
9244 /*
9245 * Note that we will switch encoding on the fly.
9246 */
9247 xmlParseXMLDecl(ctxt);
9248 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9249 /*
9250 * The XML REC instructs us to stop parsing right here
9251 */
9252 return(-1);
9253 }
9254 ctxt->standalone = ctxt->input->standalone;
9255 SKIP_BLANKS;
9256 } else {
9257 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9258 }
9259 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9260 ctxt->sax->startDocument(ctxt->userData);
9261
9262 /*
9263 * The Misc part of the Prolog
9264 */
9265 GROW;
9266 xmlParseMisc(ctxt);
9267
9268 /*
9269 * Then possibly doc type declaration(s) and more Misc
9270 * (doctypedecl Misc*)?
9271 */
9272 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009273 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009274
9275 ctxt->inSubset = 1;
9276 xmlParseDocTypeDecl(ctxt);
9277 if (RAW == '[') {
9278 ctxt->instate = XML_PARSER_DTD;
9279 xmlParseInternalSubset(ctxt);
9280 }
9281
9282 /*
9283 * Create and update the external subset.
9284 */
9285 ctxt->inSubset = 2;
9286 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9287 (!ctxt->disableSAX))
9288 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9289 ctxt->extSubSystem, ctxt->extSubURI);
9290 ctxt->inSubset = 0;
9291
Daniel Veillardac4118d2008-01-11 05:27:32 +00009292 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009293
9294 ctxt->instate = XML_PARSER_PROLOG;
9295 xmlParseMisc(ctxt);
9296 }
9297
9298 /*
9299 * Time to start parsing the tree itself
9300 */
9301 GROW;
9302 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009303 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9304 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009305 } else {
9306 ctxt->instate = XML_PARSER_CONTENT;
9307 xmlParseElement(ctxt);
9308 ctxt->instate = XML_PARSER_EPILOG;
9309
9310
9311 /*
9312 * The Misc part at the end
9313 */
9314 xmlParseMisc(ctxt);
9315
Daniel Veillard561b7f82002-03-20 21:55:57 +00009316 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009317 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009318 }
9319 ctxt->instate = XML_PARSER_EOF;
9320 }
9321
9322 /*
9323 * SAX: end of the document processing.
9324 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009325 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009326 ctxt->sax->endDocument(ctxt->userData);
9327
Daniel Veillard5997aca2002-03-18 18:36:20 +00009328 /*
9329 * Remove locally kept entity definitions if the tree was not built
9330 */
9331 if ((ctxt->myDoc != NULL) &&
9332 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9333 xmlFreeDoc(ctxt->myDoc);
9334 ctxt->myDoc = NULL;
9335 }
9336
Daniel Veillardc7612992002-02-17 22:47:37 +00009337 if (! ctxt->wellFormed) {
9338 ctxt->valid = 0;
9339 return(-1);
9340 }
Owen Taylor3473f882001-02-23 17:55:21 +00009341 return(0);
9342}
9343
9344/**
9345 * xmlParseExtParsedEnt:
9346 * @ctxt: an XML parser context
9347 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009348 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009349 * An external general parsed entity is well-formed if it matches the
9350 * production labeled extParsedEnt.
9351 *
9352 * [78] extParsedEnt ::= TextDecl? content
9353 *
9354 * Returns 0, -1 in case of error. the parser context is augmented
9355 * as a result of the parsing.
9356 */
9357
9358int
9359xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9360 xmlChar start[4];
9361 xmlCharEncoding enc;
9362
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009363 if ((ctxt == NULL) || (ctxt->input == NULL))
9364 return(-1);
9365
Owen Taylor3473f882001-02-23 17:55:21 +00009366 xmlDefaultSAXHandlerInit();
9367
Daniel Veillard309f81d2003-09-23 09:02:53 +00009368 xmlDetectSAX2(ctxt);
9369
Owen Taylor3473f882001-02-23 17:55:21 +00009370 GROW;
9371
9372 /*
9373 * SAX: beginning of the document processing.
9374 */
9375 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9376 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9377
9378 /*
9379 * Get the 4 first bytes and decode the charset
9380 * if enc != XML_CHAR_ENCODING_NONE
9381 * plug some encoding conversion routines.
9382 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009383 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9384 start[0] = RAW;
9385 start[1] = NXT(1);
9386 start[2] = NXT(2);
9387 start[3] = NXT(3);
9388 enc = xmlDetectCharEncoding(start, 4);
9389 if (enc != XML_CHAR_ENCODING_NONE) {
9390 xmlSwitchEncoding(ctxt, enc);
9391 }
Owen Taylor3473f882001-02-23 17:55:21 +00009392 }
9393
9394
9395 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009396 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009397 }
9398
9399 /*
9400 * Check for the XMLDecl in the Prolog.
9401 */
9402 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009403 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009404
9405 /*
9406 * Note that we will switch encoding on the fly.
9407 */
9408 xmlParseXMLDecl(ctxt);
9409 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9410 /*
9411 * The XML REC instructs us to stop parsing right here
9412 */
9413 return(-1);
9414 }
9415 SKIP_BLANKS;
9416 } else {
9417 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9418 }
9419 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9420 ctxt->sax->startDocument(ctxt->userData);
9421
9422 /*
9423 * Doing validity checking on chunk doesn't make sense
9424 */
9425 ctxt->instate = XML_PARSER_CONTENT;
9426 ctxt->validate = 0;
9427 ctxt->loadsubset = 0;
9428 ctxt->depth = 0;
9429
9430 xmlParseContent(ctxt);
9431
9432 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009433 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009434 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009435 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009436 }
9437
9438 /*
9439 * SAX: end of the document processing.
9440 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009441 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009442 ctxt->sax->endDocument(ctxt->userData);
9443
9444 if (! ctxt->wellFormed) return(-1);
9445 return(0);
9446}
9447
Daniel Veillard73b013f2003-09-30 12:36:01 +00009448#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009449/************************************************************************
9450 * *
9451 * Progressive parsing interfaces *
9452 * *
9453 ************************************************************************/
9454
9455/**
9456 * xmlParseLookupSequence:
9457 * @ctxt: an XML parser context
9458 * @first: the first char to lookup
9459 * @next: the next char to lookup or zero
9460 * @third: the next char to lookup or zero
9461 *
9462 * Try to find if a sequence (first, next, third) or just (first next) or
9463 * (first) is available in the input stream.
9464 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9465 * to avoid rescanning sequences of bytes, it DOES change the state of the
9466 * parser, do not use liberally.
9467 *
9468 * Returns the index to the current parsing point if the full sequence
9469 * is available, -1 otherwise.
9470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009471static int
Owen Taylor3473f882001-02-23 17:55:21 +00009472xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9473 xmlChar next, xmlChar third) {
9474 int base, len;
9475 xmlParserInputPtr in;
9476 const xmlChar *buf;
9477
9478 in = ctxt->input;
9479 if (in == NULL) return(-1);
9480 base = in->cur - in->base;
9481 if (base < 0) return(-1);
9482 if (ctxt->checkIndex > base)
9483 base = ctxt->checkIndex;
9484 if (in->buf == NULL) {
9485 buf = in->base;
9486 len = in->length;
9487 } else {
9488 buf = in->buf->buffer->content;
9489 len = in->buf->buffer->use;
9490 }
9491 /* take into account the sequence length */
9492 if (third) len -= 2;
9493 else if (next) len --;
9494 for (;base < len;base++) {
9495 if (buf[base] == first) {
9496 if (third != 0) {
9497 if ((buf[base + 1] != next) ||
9498 (buf[base + 2] != third)) continue;
9499 } else if (next != 0) {
9500 if (buf[base + 1] != next) continue;
9501 }
9502 ctxt->checkIndex = 0;
9503#ifdef DEBUG_PUSH
9504 if (next == 0)
9505 xmlGenericError(xmlGenericErrorContext,
9506 "PP: lookup '%c' found at %d\n",
9507 first, base);
9508 else if (third == 0)
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: lookup '%c%c' found at %d\n",
9511 first, next, base);
9512 else
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: lookup '%c%c%c' found at %d\n",
9515 first, next, third, base);
9516#endif
9517 return(base - (in->cur - in->base));
9518 }
9519 }
9520 ctxt->checkIndex = base;
9521#ifdef DEBUG_PUSH
9522 if (next == 0)
9523 xmlGenericError(xmlGenericErrorContext,
9524 "PP: lookup '%c' failed\n", first);
9525 else if (third == 0)
9526 xmlGenericError(xmlGenericErrorContext,
9527 "PP: lookup '%c%c' failed\n", first, next);
9528 else
9529 xmlGenericError(xmlGenericErrorContext,
9530 "PP: lookup '%c%c%c' failed\n", first, next, third);
9531#endif
9532 return(-1);
9533}
9534
9535/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009536 * xmlParseGetLasts:
9537 * @ctxt: an XML parser context
9538 * @lastlt: pointer to store the last '<' from the input
9539 * @lastgt: pointer to store the last '>' from the input
9540 *
9541 * Lookup the last < and > in the current chunk
9542 */
9543static void
9544xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9545 const xmlChar **lastgt) {
9546 const xmlChar *tmp;
9547
9548 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9549 xmlGenericError(xmlGenericErrorContext,
9550 "Internal error: xmlParseGetLasts\n");
9551 return;
9552 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009553 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 tmp = ctxt->input->end;
9555 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009556 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009557 if (tmp < ctxt->input->base) {
9558 *lastlt = NULL;
9559 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009560 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009561 *lastlt = tmp;
9562 tmp++;
9563 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9564 if (*tmp == '\'') {
9565 tmp++;
9566 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9567 if (tmp < ctxt->input->end) tmp++;
9568 } else if (*tmp == '"') {
9569 tmp++;
9570 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9571 if (tmp < ctxt->input->end) tmp++;
9572 } else
9573 tmp++;
9574 }
9575 if (tmp < ctxt->input->end)
9576 *lastgt = tmp;
9577 else {
9578 tmp = *lastlt;
9579 tmp--;
9580 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9581 if (tmp >= ctxt->input->base)
9582 *lastgt = tmp;
9583 else
9584 *lastgt = NULL;
9585 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 } else {
9588 *lastlt = NULL;
9589 *lastgt = NULL;
9590 }
9591}
9592/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009593 * xmlCheckCdataPush:
9594 * @cur: pointer to the bock of characters
9595 * @len: length of the block in bytes
9596 *
9597 * Check that the block of characters is okay as SCdata content [20]
9598 *
9599 * Returns the number of bytes to pass if okay, a negative index where an
9600 * UTF-8 error occured otherwise
9601 */
9602static int
9603xmlCheckCdataPush(const xmlChar *utf, int len) {
9604 int ix;
9605 unsigned char c;
9606 int codepoint;
9607
9608 if ((utf == NULL) || (len <= 0))
9609 return(0);
9610
9611 for (ix = 0; ix < len;) { /* string is 0-terminated */
9612 c = utf[ix];
9613 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9614 if (c >= 0x20)
9615 ix++;
9616 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9617 ix++;
9618 else
9619 return(-ix);
9620 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9621 if (ix + 2 > len) return(ix);
9622 if ((utf[ix+1] & 0xc0 ) != 0x80)
9623 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009624 codepoint = (utf[ix] & 0x1f) << 6;
9625 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009626 if (!xmlIsCharQ(codepoint))
9627 return(-ix);
9628 ix += 2;
9629 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9630 if (ix + 3 > len) return(ix);
9631 if (((utf[ix+1] & 0xc0) != 0x80) ||
9632 ((utf[ix+2] & 0xc0) != 0x80))
9633 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009634 codepoint = (utf[ix] & 0xf) << 12;
9635 codepoint |= (utf[ix+1] & 0x3f) << 6;
9636 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009637 if (!xmlIsCharQ(codepoint))
9638 return(-ix);
9639 ix += 3;
9640 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9641 if (ix + 4 > len) return(ix);
9642 if (((utf[ix+1] & 0xc0) != 0x80) ||
9643 ((utf[ix+2] & 0xc0) != 0x80) ||
9644 ((utf[ix+3] & 0xc0) != 0x80))
9645 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009646 codepoint = (utf[ix] & 0x7) << 18;
9647 codepoint |= (utf[ix+1] & 0x3f) << 12;
9648 codepoint |= (utf[ix+2] & 0x3f) << 6;
9649 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009650 if (!xmlIsCharQ(codepoint))
9651 return(-ix);
9652 ix += 4;
9653 } else /* unknown encoding */
9654 return(-ix);
9655 }
9656 return(ix);
9657}
9658
9659/**
Owen Taylor3473f882001-02-23 17:55:21 +00009660 * xmlParseTryOrFinish:
9661 * @ctxt: an XML parser context
9662 * @terminate: last chunk indicator
9663 *
9664 * Try to progress on parsing
9665 *
9666 * Returns zero if no parsing was possible
9667 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009668static int
Owen Taylor3473f882001-02-23 17:55:21 +00009669xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9670 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009671 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009672 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009673 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009674
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009675 if (ctxt->input == NULL)
9676 return(0);
9677
Owen Taylor3473f882001-02-23 17:55:21 +00009678#ifdef DEBUG_PUSH
9679 switch (ctxt->instate) {
9680 case XML_PARSER_EOF:
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: try EOF\n"); break;
9683 case XML_PARSER_START:
9684 xmlGenericError(xmlGenericErrorContext,
9685 "PP: try START\n"); break;
9686 case XML_PARSER_MISC:
9687 xmlGenericError(xmlGenericErrorContext,
9688 "PP: try MISC\n");break;
9689 case XML_PARSER_COMMENT:
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: try COMMENT\n");break;
9692 case XML_PARSER_PROLOG:
9693 xmlGenericError(xmlGenericErrorContext,
9694 "PP: try PROLOG\n");break;
9695 case XML_PARSER_START_TAG:
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: try START_TAG\n");break;
9698 case XML_PARSER_CONTENT:
9699 xmlGenericError(xmlGenericErrorContext,
9700 "PP: try CONTENT\n");break;
9701 case XML_PARSER_CDATA_SECTION:
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: try CDATA_SECTION\n");break;
9704 case XML_PARSER_END_TAG:
9705 xmlGenericError(xmlGenericErrorContext,
9706 "PP: try END_TAG\n");break;
9707 case XML_PARSER_ENTITY_DECL:
9708 xmlGenericError(xmlGenericErrorContext,
9709 "PP: try ENTITY_DECL\n");break;
9710 case XML_PARSER_ENTITY_VALUE:
9711 xmlGenericError(xmlGenericErrorContext,
9712 "PP: try ENTITY_VALUE\n");break;
9713 case XML_PARSER_ATTRIBUTE_VALUE:
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: try ATTRIBUTE_VALUE\n");break;
9716 case XML_PARSER_DTD:
9717 xmlGenericError(xmlGenericErrorContext,
9718 "PP: try DTD\n");break;
9719 case XML_PARSER_EPILOG:
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: try EPILOG\n");break;
9722 case XML_PARSER_PI:
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: try PI\n");break;
9725 case XML_PARSER_IGNORE:
9726 xmlGenericError(xmlGenericErrorContext,
9727 "PP: try IGNORE\n");break;
9728 }
9729#endif
9730
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009731 if ((ctxt->input != NULL) &&
9732 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009733 xmlSHRINK(ctxt);
9734 ctxt->checkIndex = 0;
9735 }
9736 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009737
Daniel Veillarda880b122003-04-21 21:36:41 +00009738 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009739 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009740 return(0);
9741
9742
Owen Taylor3473f882001-02-23 17:55:21 +00009743 /*
9744 * Pop-up of finished entities.
9745 */
9746 while ((RAW == 0) && (ctxt->inputNr > 1))
9747 xmlPopInput(ctxt);
9748
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009749 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009750 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 avail = ctxt->input->length -
9752 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009753 else {
9754 /*
9755 * If we are operating on converted input, try to flush
9756 * remainng chars to avoid them stalling in the non-converted
9757 * buffer.
9758 */
9759 if ((ctxt->input->buf->raw != NULL) &&
9760 (ctxt->input->buf->raw->use > 0)) {
9761 int base = ctxt->input->base -
9762 ctxt->input->buf->buffer->content;
9763 int current = ctxt->input->cur - ctxt->input->base;
9764
9765 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9766 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9767 ctxt->input->cur = ctxt->input->base + current;
9768 ctxt->input->end =
9769 &ctxt->input->buf->buffer->content[
9770 ctxt->input->buf->buffer->use];
9771 }
9772 avail = ctxt->input->buf->buffer->use -
9773 (ctxt->input->cur - ctxt->input->base);
9774 }
Owen Taylor3473f882001-02-23 17:55:21 +00009775 if (avail < 1)
9776 goto done;
9777 switch (ctxt->instate) {
9778 case XML_PARSER_EOF:
9779 /*
9780 * Document parsing is done !
9781 */
9782 goto done;
9783 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009784 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9785 xmlChar start[4];
9786 xmlCharEncoding enc;
9787
9788 /*
9789 * Very first chars read from the document flow.
9790 */
9791 if (avail < 4)
9792 goto done;
9793
9794 /*
9795 * Get the 4 first bytes and decode the charset
9796 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009797 * plug some encoding conversion routines,
9798 * else xmlSwitchEncoding will set to (default)
9799 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009800 */
9801 start[0] = RAW;
9802 start[1] = NXT(1);
9803 start[2] = NXT(2);
9804 start[3] = NXT(3);
9805 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009806 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009807 break;
9808 }
Owen Taylor3473f882001-02-23 17:55:21 +00009809
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009810 if (avail < 2)
9811 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 cur = ctxt->input->cur[0];
9813 next = ctxt->input->cur[1];
9814 if (cur == 0) {
9815 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9816 ctxt->sax->setDocumentLocator(ctxt->userData,
9817 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009818 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009819 ctxt->instate = XML_PARSER_EOF;
9820#ifdef DEBUG_PUSH
9821 xmlGenericError(xmlGenericErrorContext,
9822 "PP: entering EOF\n");
9823#endif
9824 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9825 ctxt->sax->endDocument(ctxt->userData);
9826 goto done;
9827 }
9828 if ((cur == '<') && (next == '?')) {
9829 /* PI or XML decl */
9830 if (avail < 5) return(ret);
9831 if ((!terminate) &&
9832 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9833 return(ret);
9834 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9835 ctxt->sax->setDocumentLocator(ctxt->userData,
9836 &xmlDefaultSAXLocator);
9837 if ((ctxt->input->cur[2] == 'x') &&
9838 (ctxt->input->cur[3] == 'm') &&
9839 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009840 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009841 ret += 5;
9842#ifdef DEBUG_PUSH
9843 xmlGenericError(xmlGenericErrorContext,
9844 "PP: Parsing XML Decl\n");
9845#endif
9846 xmlParseXMLDecl(ctxt);
9847 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9848 /*
9849 * The XML REC instructs us to stop parsing right
9850 * here
9851 */
9852 ctxt->instate = XML_PARSER_EOF;
9853 return(0);
9854 }
9855 ctxt->standalone = ctxt->input->standalone;
9856 if ((ctxt->encoding == NULL) &&
9857 (ctxt->input->encoding != NULL))
9858 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9859 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9860 (!ctxt->disableSAX))
9861 ctxt->sax->startDocument(ctxt->userData);
9862 ctxt->instate = XML_PARSER_MISC;
9863#ifdef DEBUG_PUSH
9864 xmlGenericError(xmlGenericErrorContext,
9865 "PP: entering MISC\n");
9866#endif
9867 } else {
9868 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9869 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9870 (!ctxt->disableSAX))
9871 ctxt->sax->startDocument(ctxt->userData);
9872 ctxt->instate = XML_PARSER_MISC;
9873#ifdef DEBUG_PUSH
9874 xmlGenericError(xmlGenericErrorContext,
9875 "PP: entering MISC\n");
9876#endif
9877 }
9878 } else {
9879 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9880 ctxt->sax->setDocumentLocator(ctxt->userData,
9881 &xmlDefaultSAXLocator);
9882 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009883 if (ctxt->version == NULL) {
9884 xmlErrMemory(ctxt, NULL);
9885 break;
9886 }
Owen Taylor3473f882001-02-23 17:55:21 +00009887 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9888 (!ctxt->disableSAX))
9889 ctxt->sax->startDocument(ctxt->userData);
9890 ctxt->instate = XML_PARSER_MISC;
9891#ifdef DEBUG_PUSH
9892 xmlGenericError(xmlGenericErrorContext,
9893 "PP: entering MISC\n");
9894#endif
9895 }
9896 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009897 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009898 const xmlChar *name;
9899 const xmlChar *prefix;
9900 const xmlChar *URI;
9901 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009902
9903 if ((avail < 2) && (ctxt->inputNr == 1))
9904 goto done;
9905 cur = ctxt->input->cur[0];
9906 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009908 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9910 ctxt->sax->endDocument(ctxt->userData);
9911 goto done;
9912 }
9913 if (!terminate) {
9914 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009915 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009916 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009917 goto done;
9918 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9919 goto done;
9920 }
9921 }
9922 if (ctxt->spaceNr == 0)
9923 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009924 else if (*ctxt->space == -2)
9925 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009926 else
9927 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009928#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009929 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009930#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009933 else
9934 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009935#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009936 if (name == NULL) {
9937 spacePop(ctxt);
9938 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9940 ctxt->sax->endDocument(ctxt->userData);
9941 goto done;
9942 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009943#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009944 /*
9945 * [ VC: Root Element Type ]
9946 * The Name in the document type declaration must match
9947 * the element type of the root element.
9948 */
9949 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9950 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9951 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009952#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009953
9954 /*
9955 * Check for an Empty Element.
9956 */
9957 if ((RAW == '/') && (NXT(1) == '>')) {
9958 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009959
9960 if (ctxt->sax2) {
9961 if ((ctxt->sax != NULL) &&
9962 (ctxt->sax->endElementNs != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->endElementNs(ctxt->userData, name,
9965 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009966 if (ctxt->nsNr - nsNr > 0)
9967 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009968#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009969 } else {
9970 if ((ctxt->sax != NULL) &&
9971 (ctxt->sax->endElement != NULL) &&
9972 (!ctxt->disableSAX))
9973 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009974#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009976 spacePop(ctxt);
9977 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009978 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009979 } else {
9980 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009981 }
9982 break;
9983 }
9984 if (RAW == '>') {
9985 NEXT;
9986 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009987 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009988 "Couldn't find end of Start Tag %s\n",
9989 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009990 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009991 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009993 if (ctxt->sax2)
9994 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009995#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009996 else
9997 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009998#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009999
Daniel Veillarda880b122003-04-21 21:36:41 +000010000 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010001 break;
10002 }
10003 case XML_PARSER_CONTENT: {
10004 const xmlChar *test;
10005 unsigned int cons;
10006 if ((avail < 2) && (ctxt->inputNr == 1))
10007 goto done;
10008 cur = ctxt->input->cur[0];
10009 next = ctxt->input->cur[1];
10010
10011 test = CUR_PTR;
10012 cons = ctxt->input->consumed;
10013 if ((cur == '<') && (next == '/')) {
10014 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010015 break;
10016 } else if ((cur == '<') && (next == '?')) {
10017 if ((!terminate) &&
10018 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10019 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 xmlParsePI(ctxt);
10021 } else if ((cur == '<') && (next != '!')) {
10022 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010023 break;
10024 } else if ((cur == '<') && (next == '!') &&
10025 (ctxt->input->cur[2] == '-') &&
10026 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010027 int term;
10028
10029 if (avail < 4)
10030 goto done;
10031 ctxt->input->cur += 4;
10032 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10033 ctxt->input->cur -= 4;
10034 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 xmlParseComment(ctxt);
10037 ctxt->instate = XML_PARSER_CONTENT;
10038 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10039 (ctxt->input->cur[2] == '[') &&
10040 (ctxt->input->cur[3] == 'C') &&
10041 (ctxt->input->cur[4] == 'D') &&
10042 (ctxt->input->cur[5] == 'A') &&
10043 (ctxt->input->cur[6] == 'T') &&
10044 (ctxt->input->cur[7] == 'A') &&
10045 (ctxt->input->cur[8] == '[')) {
10046 SKIP(9);
10047 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010048 break;
10049 } else if ((cur == '<') && (next == '!') &&
10050 (avail < 9)) {
10051 goto done;
10052 } else if (cur == '&') {
10053 if ((!terminate) &&
10054 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10055 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010056 xmlParseReference(ctxt);
10057 } else {
10058 /* TODO Avoid the extra copy, handle directly !!! */
10059 /*
10060 * Goal of the following test is:
10061 * - minimize calls to the SAX 'character' callback
10062 * when they are mergeable
10063 * - handle an problem for isBlank when we only parse
10064 * a sequence of blank chars and the next one is
10065 * not available to check against '<' presence.
10066 * - tries to homogenize the differences in SAX
10067 * callbacks between the push and pull versions
10068 * of the parser.
10069 */
10070 if ((ctxt->inputNr == 1) &&
10071 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10072 if (!terminate) {
10073 if (ctxt->progressive) {
10074 if ((lastlt == NULL) ||
10075 (ctxt->input->cur > lastlt))
10076 goto done;
10077 } else if (xmlParseLookupSequence(ctxt,
10078 '<', 0, 0) < 0) {
10079 goto done;
10080 }
10081 }
10082 }
10083 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010084 xmlParseCharData(ctxt, 0);
10085 }
10086 /*
10087 * Pop-up of finished entities.
10088 */
10089 while ((RAW == 0) && (ctxt->inputNr > 1))
10090 xmlPopInput(ctxt);
10091 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10093 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010094 ctxt->instate = XML_PARSER_EOF;
10095 break;
10096 }
10097 break;
10098 }
10099 case XML_PARSER_END_TAG:
10100 if (avail < 2)
10101 goto done;
10102 if (!terminate) {
10103 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010104 /* > can be found unescaped in attribute values */
10105 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010106 goto done;
10107 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10108 goto done;
10109 }
10110 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010111 if (ctxt->sax2) {
10112 xmlParseEndTag2(ctxt,
10113 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10114 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010115 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010116 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010117 }
10118#ifdef LIBXML_SAX1_ENABLED
10119 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010120 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010121#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010122 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010123 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010124 } else {
10125 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010126 }
10127 break;
10128 case XML_PARSER_CDATA_SECTION: {
10129 /*
10130 * The Push mode need to have the SAX callback for
10131 * cdataBlock merge back contiguous callbacks.
10132 */
10133 int base;
10134
10135 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10136 if (base < 0) {
10137 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010138 int tmp;
10139
10140 tmp = xmlCheckCdataPush(ctxt->input->cur,
10141 XML_PARSER_BIG_BUFFER_SIZE);
10142 if (tmp < 0) {
10143 tmp = -tmp;
10144 ctxt->input->cur += tmp;
10145 goto encoding_error;
10146 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010147 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10148 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010149 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010150 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010151 else if (ctxt->sax->characters != NULL)
10152 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010153 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010154 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010155 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010156 ctxt->checkIndex = 0;
10157 }
10158 goto done;
10159 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010160 int tmp;
10161
10162 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10163 if ((tmp < 0) || (tmp != base)) {
10164 tmp = -tmp;
10165 ctxt->input->cur += tmp;
10166 goto encoding_error;
10167 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010168 if ((ctxt->sax != NULL) && (base > 0) &&
10169 (!ctxt->disableSAX)) {
10170 if (ctxt->sax->cdataBlock != NULL)
10171 ctxt->sax->cdataBlock(ctxt->userData,
10172 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010173 else if (ctxt->sax->characters != NULL)
10174 ctxt->sax->characters(ctxt->userData,
10175 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010176 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010177 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010178 ctxt->checkIndex = 0;
10179 ctxt->instate = XML_PARSER_CONTENT;
10180#ifdef DEBUG_PUSH
10181 xmlGenericError(xmlGenericErrorContext,
10182 "PP: entering CONTENT\n");
10183#endif
10184 }
10185 break;
10186 }
Owen Taylor3473f882001-02-23 17:55:21 +000010187 case XML_PARSER_MISC:
10188 SKIP_BLANKS;
10189 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010190 avail = ctxt->input->length -
10191 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010192 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010193 avail = ctxt->input->buf->buffer->use -
10194 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 if (avail < 2)
10196 goto done;
10197 cur = ctxt->input->cur[0];
10198 next = ctxt->input->cur[1];
10199 if ((cur == '<') && (next == '?')) {
10200 if ((!terminate) &&
10201 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10202 goto done;
10203#ifdef DEBUG_PUSH
10204 xmlGenericError(xmlGenericErrorContext,
10205 "PP: Parsing PI\n");
10206#endif
10207 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010208 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010209 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010210 (ctxt->input->cur[2] == '-') &&
10211 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010212 if ((!terminate) &&
10213 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10214 goto done;
10215#ifdef DEBUG_PUSH
10216 xmlGenericError(xmlGenericErrorContext,
10217 "PP: Parsing Comment\n");
10218#endif
10219 xmlParseComment(ctxt);
10220 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010221 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010222 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010223 (ctxt->input->cur[2] == 'D') &&
10224 (ctxt->input->cur[3] == 'O') &&
10225 (ctxt->input->cur[4] == 'C') &&
10226 (ctxt->input->cur[5] == 'T') &&
10227 (ctxt->input->cur[6] == 'Y') &&
10228 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010229 (ctxt->input->cur[8] == 'E')) {
10230 if ((!terminate) &&
10231 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10232 goto done;
10233#ifdef DEBUG_PUSH
10234 xmlGenericError(xmlGenericErrorContext,
10235 "PP: Parsing internal subset\n");
10236#endif
10237 ctxt->inSubset = 1;
10238 xmlParseDocTypeDecl(ctxt);
10239 if (RAW == '[') {
10240 ctxt->instate = XML_PARSER_DTD;
10241#ifdef DEBUG_PUSH
10242 xmlGenericError(xmlGenericErrorContext,
10243 "PP: entering DTD\n");
10244#endif
10245 } else {
10246 /*
10247 * Create and update the external subset.
10248 */
10249 ctxt->inSubset = 2;
10250 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10251 (ctxt->sax->externalSubset != NULL))
10252 ctxt->sax->externalSubset(ctxt->userData,
10253 ctxt->intSubName, ctxt->extSubSystem,
10254 ctxt->extSubURI);
10255 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010256 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 ctxt->instate = XML_PARSER_PROLOG;
10258#ifdef DEBUG_PUSH
10259 xmlGenericError(xmlGenericErrorContext,
10260 "PP: entering PROLOG\n");
10261#endif
10262 }
10263 } else if ((cur == '<') && (next == '!') &&
10264 (avail < 9)) {
10265 goto done;
10266 } else {
10267 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010268 ctxt->progressive = 1;
10269 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010270#ifdef DEBUG_PUSH
10271 xmlGenericError(xmlGenericErrorContext,
10272 "PP: entering START_TAG\n");
10273#endif
10274 }
10275 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010276 case XML_PARSER_PROLOG:
10277 SKIP_BLANKS;
10278 if (ctxt->input->buf == NULL)
10279 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10280 else
10281 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10282 if (avail < 2)
10283 goto done;
10284 cur = ctxt->input->cur[0];
10285 next = ctxt->input->cur[1];
10286 if ((cur == '<') && (next == '?')) {
10287 if ((!terminate) &&
10288 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10289 goto done;
10290#ifdef DEBUG_PUSH
10291 xmlGenericError(xmlGenericErrorContext,
10292 "PP: Parsing PI\n");
10293#endif
10294 xmlParsePI(ctxt);
10295 } else if ((cur == '<') && (next == '!') &&
10296 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10297 if ((!terminate) &&
10298 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10299 goto done;
10300#ifdef DEBUG_PUSH
10301 xmlGenericError(xmlGenericErrorContext,
10302 "PP: Parsing Comment\n");
10303#endif
10304 xmlParseComment(ctxt);
10305 ctxt->instate = XML_PARSER_PROLOG;
10306 } else if ((cur == '<') && (next == '!') &&
10307 (avail < 4)) {
10308 goto done;
10309 } else {
10310 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010311 if (ctxt->progressive == 0)
10312 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010313 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010314#ifdef DEBUG_PUSH
10315 xmlGenericError(xmlGenericErrorContext,
10316 "PP: entering START_TAG\n");
10317#endif
10318 }
10319 break;
10320 case XML_PARSER_EPILOG:
10321 SKIP_BLANKS;
10322 if (ctxt->input->buf == NULL)
10323 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10324 else
10325 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10326 if (avail < 2)
10327 goto done;
10328 cur = ctxt->input->cur[0];
10329 next = ctxt->input->cur[1];
10330 if ((cur == '<') && (next == '?')) {
10331 if ((!terminate) &&
10332 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10333 goto done;
10334#ifdef DEBUG_PUSH
10335 xmlGenericError(xmlGenericErrorContext,
10336 "PP: Parsing PI\n");
10337#endif
10338 xmlParsePI(ctxt);
10339 ctxt->instate = XML_PARSER_EPILOG;
10340 } else if ((cur == '<') && (next == '!') &&
10341 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10342 if ((!terminate) &&
10343 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10344 goto done;
10345#ifdef DEBUG_PUSH
10346 xmlGenericError(xmlGenericErrorContext,
10347 "PP: Parsing Comment\n");
10348#endif
10349 xmlParseComment(ctxt);
10350 ctxt->instate = XML_PARSER_EPILOG;
10351 } else if ((cur == '<') && (next == '!') &&
10352 (avail < 4)) {
10353 goto done;
10354 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010355 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010356 ctxt->instate = XML_PARSER_EOF;
10357#ifdef DEBUG_PUSH
10358 xmlGenericError(xmlGenericErrorContext,
10359 "PP: entering EOF\n");
10360#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010361 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010362 ctxt->sax->endDocument(ctxt->userData);
10363 goto done;
10364 }
10365 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010366 case XML_PARSER_DTD: {
10367 /*
10368 * Sorry but progressive parsing of the internal subset
10369 * is not expected to be supported. We first check that
10370 * the full content of the internal subset is available and
10371 * the parsing is launched only at that point.
10372 * Internal subset ends up with "']' S? '>'" in an unescaped
10373 * section and not in a ']]>' sequence which are conditional
10374 * sections (whoever argued to keep that crap in XML deserve
10375 * a place in hell !).
10376 */
10377 int base, i;
10378 xmlChar *buf;
10379 xmlChar quote = 0;
10380
10381 base = ctxt->input->cur - ctxt->input->base;
10382 if (base < 0) return(0);
10383 if (ctxt->checkIndex > base)
10384 base = ctxt->checkIndex;
10385 buf = ctxt->input->buf->buffer->content;
10386 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10387 base++) {
10388 if (quote != 0) {
10389 if (buf[base] == quote)
10390 quote = 0;
10391 continue;
10392 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010393 if ((quote == 0) && (buf[base] == '<')) {
10394 int found = 0;
10395 /* special handling of comments */
10396 if (((unsigned int) base + 4 <
10397 ctxt->input->buf->buffer->use) &&
10398 (buf[base + 1] == '!') &&
10399 (buf[base + 2] == '-') &&
10400 (buf[base + 3] == '-')) {
10401 for (;(unsigned int) base + 3 <
10402 ctxt->input->buf->buffer->use; base++) {
10403 if ((buf[base] == '-') &&
10404 (buf[base + 1] == '-') &&
10405 (buf[base + 2] == '>')) {
10406 found = 1;
10407 base += 2;
10408 break;
10409 }
10410 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010411 if (!found) {
10412#if 0
10413 fprintf(stderr, "unfinished comment\n");
10414#endif
10415 break; /* for */
10416 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010417 continue;
10418 }
10419 }
Owen Taylor3473f882001-02-23 17:55:21 +000010420 if (buf[base] == '"') {
10421 quote = '"';
10422 continue;
10423 }
10424 if (buf[base] == '\'') {
10425 quote = '\'';
10426 continue;
10427 }
10428 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010429#if 0
10430 fprintf(stderr, "%c%c%c%c: ", buf[base],
10431 buf[base + 1], buf[base + 2], buf[base + 3]);
10432#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010433 if ((unsigned int) base +1 >=
10434 ctxt->input->buf->buffer->use)
10435 break;
10436 if (buf[base + 1] == ']') {
10437 /* conditional crap, skip both ']' ! */
10438 base++;
10439 continue;
10440 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010441 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010442 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10443 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010444 if (buf[base + i] == '>') {
10445#if 0
10446 fprintf(stderr, "found\n");
10447#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010448 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010449 }
10450 if (!IS_BLANK_CH(buf[base + i])) {
10451#if 0
10452 fprintf(stderr, "not found\n");
10453#endif
10454 goto not_end_of_int_subset;
10455 }
Owen Taylor3473f882001-02-23 17:55:21 +000010456 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010457#if 0
10458 fprintf(stderr, "end of stream\n");
10459#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010460 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010461
Owen Taylor3473f882001-02-23 17:55:21 +000010462 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010463not_end_of_int_subset:
10464 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010465 }
10466 /*
10467 * We didn't found the end of the Internal subset
10468 */
Owen Taylor3473f882001-02-23 17:55:21 +000010469#ifdef DEBUG_PUSH
10470 if (next == 0)
10471 xmlGenericError(xmlGenericErrorContext,
10472 "PP: lookup of int subset end filed\n");
10473#endif
10474 goto done;
10475
10476found_end_int_subset:
10477 xmlParseInternalSubset(ctxt);
10478 ctxt->inSubset = 2;
10479 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10480 (ctxt->sax->externalSubset != NULL))
10481 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10482 ctxt->extSubSystem, ctxt->extSubURI);
10483 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010484 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010485 ctxt->instate = XML_PARSER_PROLOG;
10486 ctxt->checkIndex = 0;
10487#ifdef DEBUG_PUSH
10488 xmlGenericError(xmlGenericErrorContext,
10489 "PP: entering PROLOG\n");
10490#endif
10491 break;
10492 }
10493 case XML_PARSER_COMMENT:
10494 xmlGenericError(xmlGenericErrorContext,
10495 "PP: internal error, state == COMMENT\n");
10496 ctxt->instate = XML_PARSER_CONTENT;
10497#ifdef DEBUG_PUSH
10498 xmlGenericError(xmlGenericErrorContext,
10499 "PP: entering CONTENT\n");
10500#endif
10501 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010502 case XML_PARSER_IGNORE:
10503 xmlGenericError(xmlGenericErrorContext,
10504 "PP: internal error, state == IGNORE");
10505 ctxt->instate = XML_PARSER_DTD;
10506#ifdef DEBUG_PUSH
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: entering DTD\n");
10509#endif
10510 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010511 case XML_PARSER_PI:
10512 xmlGenericError(xmlGenericErrorContext,
10513 "PP: internal error, state == PI\n");
10514 ctxt->instate = XML_PARSER_CONTENT;
10515#ifdef DEBUG_PUSH
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: entering CONTENT\n");
10518#endif
10519 break;
10520 case XML_PARSER_ENTITY_DECL:
10521 xmlGenericError(xmlGenericErrorContext,
10522 "PP: internal error, state == ENTITY_DECL\n");
10523 ctxt->instate = XML_PARSER_DTD;
10524#ifdef DEBUG_PUSH
10525 xmlGenericError(xmlGenericErrorContext,
10526 "PP: entering DTD\n");
10527#endif
10528 break;
10529 case XML_PARSER_ENTITY_VALUE:
10530 xmlGenericError(xmlGenericErrorContext,
10531 "PP: internal error, state == ENTITY_VALUE\n");
10532 ctxt->instate = XML_PARSER_CONTENT;
10533#ifdef DEBUG_PUSH
10534 xmlGenericError(xmlGenericErrorContext,
10535 "PP: entering DTD\n");
10536#endif
10537 break;
10538 case XML_PARSER_ATTRIBUTE_VALUE:
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10541 ctxt->instate = XML_PARSER_START_TAG;
10542#ifdef DEBUG_PUSH
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: entering START_TAG\n");
10545#endif
10546 break;
10547 case XML_PARSER_SYSTEM_LITERAL:
10548 xmlGenericError(xmlGenericErrorContext,
10549 "PP: internal error, state == SYSTEM_LITERAL\n");
10550 ctxt->instate = XML_PARSER_START_TAG;
10551#ifdef DEBUG_PUSH
10552 xmlGenericError(xmlGenericErrorContext,
10553 "PP: entering START_TAG\n");
10554#endif
10555 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010556 case XML_PARSER_PUBLIC_LITERAL:
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: internal error, state == PUBLIC_LITERAL\n");
10559 ctxt->instate = XML_PARSER_START_TAG;
10560#ifdef DEBUG_PUSH
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: entering START_TAG\n");
10563#endif
10564 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010565 }
10566 }
10567done:
10568#ifdef DEBUG_PUSH
10569 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10570#endif
10571 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010572encoding_error:
10573 {
10574 char buffer[150];
10575
10576 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10577 ctxt->input->cur[0], ctxt->input->cur[1],
10578 ctxt->input->cur[2], ctxt->input->cur[3]);
10579 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10580 "Input is not proper UTF-8, indicate encoding !\n%s",
10581 BAD_CAST buffer, NULL);
10582 }
10583 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010584}
10585
10586/**
Owen Taylor3473f882001-02-23 17:55:21 +000010587 * xmlParseChunk:
10588 * @ctxt: an XML parser context
10589 * @chunk: an char array
10590 * @size: the size in byte of the chunk
10591 * @terminate: last chunk indicator
10592 *
10593 * Parse a Chunk of memory
10594 *
10595 * Returns zero if no error, the xmlParserErrors otherwise.
10596 */
10597int
10598xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10599 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010600 int end_in_lf = 0;
10601
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010602 if (ctxt == NULL)
10603 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010604 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010605 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010606 if (ctxt->instate == XML_PARSER_START)
10607 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010608 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10609 (chunk[size - 1] == '\r')) {
10610 end_in_lf = 1;
10611 size--;
10612 }
Owen Taylor3473f882001-02-23 17:55:21 +000010613 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10614 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10615 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10616 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010617 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010618
William M. Bracka3215c72004-07-31 16:24:01 +000010619 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10620 if (res < 0) {
10621 ctxt->errNo = XML_PARSER_EOF;
10622 ctxt->disableSAX = 1;
10623 return (XML_PARSER_EOF);
10624 }
Owen Taylor3473f882001-02-23 17:55:21 +000010625 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10626 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010627 ctxt->input->end =
10628 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010629#ifdef DEBUG_PUSH
10630 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10631#endif
10632
Owen Taylor3473f882001-02-23 17:55:21 +000010633 } else if (ctxt->instate != XML_PARSER_EOF) {
10634 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10635 xmlParserInputBufferPtr in = ctxt->input->buf;
10636 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10637 (in->raw != NULL)) {
10638 int nbchars;
10639
10640 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10641 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010642 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010643 xmlGenericError(xmlGenericErrorContext,
10644 "xmlParseChunk: encoder error\n");
10645 return(XML_ERR_INVALID_ENCODING);
10646 }
10647 }
10648 }
10649 }
10650 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010651 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10652 (ctxt->input->buf != NULL)) {
10653 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10654 }
Daniel Veillard14412512005-01-21 23:53:26 +000010655 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010656 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010657 if (terminate) {
10658 /*
10659 * Check for termination
10660 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010661 int avail = 0;
10662
10663 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010664 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010665 avail = ctxt->input->length -
10666 (ctxt->input->cur - ctxt->input->base);
10667 else
10668 avail = ctxt->input->buf->buffer->use -
10669 (ctxt->input->cur - ctxt->input->base);
10670 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010671
Owen Taylor3473f882001-02-23 17:55:21 +000010672 if ((ctxt->instate != XML_PARSER_EOF) &&
10673 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010674 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010675 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010676 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010677 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010678 }
Owen Taylor3473f882001-02-23 17:55:21 +000010679 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010680 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010681 ctxt->sax->endDocument(ctxt->userData);
10682 }
10683 ctxt->instate = XML_PARSER_EOF;
10684 }
10685 return((xmlParserErrors) ctxt->errNo);
10686}
10687
10688/************************************************************************
10689 * *
10690 * I/O front end functions to the parser *
10691 * *
10692 ************************************************************************/
10693
10694/**
Owen Taylor3473f882001-02-23 17:55:21 +000010695 * xmlCreatePushParserCtxt:
10696 * @sax: a SAX handler
10697 * @user_data: The user data returned on SAX callbacks
10698 * @chunk: a pointer to an array of chars
10699 * @size: number of chars in the array
10700 * @filename: an optional file name or URI
10701 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010702 * Create a parser context for using the XML parser in push mode.
10703 * If @buffer and @size are non-NULL, the data is used to detect
10704 * the encoding. The remaining characters will be parsed so they
10705 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010706 * To allow content encoding detection, @size should be >= 4
10707 * The value of @filename is used for fetching external entities
10708 * and error/warning reports.
10709 *
10710 * Returns the new parser context or NULL
10711 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010712
Owen Taylor3473f882001-02-23 17:55:21 +000010713xmlParserCtxtPtr
10714xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10715 const char *chunk, int size, const char *filename) {
10716 xmlParserCtxtPtr ctxt;
10717 xmlParserInputPtr inputStream;
10718 xmlParserInputBufferPtr buf;
10719 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10720
10721 /*
10722 * plug some encoding conversion routines
10723 */
10724 if ((chunk != NULL) && (size >= 4))
10725 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10726
10727 buf = xmlAllocParserInputBuffer(enc);
10728 if (buf == NULL) return(NULL);
10729
10730 ctxt = xmlNewParserCtxt();
10731 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010732 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010733 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010734 return(NULL);
10735 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010736 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010737 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10738 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010739 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010740 xmlFreeParserInputBuffer(buf);
10741 xmlFreeParserCtxt(ctxt);
10742 return(NULL);
10743 }
Owen Taylor3473f882001-02-23 17:55:21 +000010744 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010745#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010746 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010747#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010748 xmlFree(ctxt->sax);
10749 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10750 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010751 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010752 xmlFreeParserInputBuffer(buf);
10753 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010754 return(NULL);
10755 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010756 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10757 if (sax->initialized == XML_SAX2_MAGIC)
10758 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10759 else
10760 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010761 if (user_data != NULL)
10762 ctxt->userData = user_data;
10763 }
10764 if (filename == NULL) {
10765 ctxt->directory = NULL;
10766 } else {
10767 ctxt->directory = xmlParserGetDirectory(filename);
10768 }
10769
10770 inputStream = xmlNewInputStream(ctxt);
10771 if (inputStream == NULL) {
10772 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010773 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010774 return(NULL);
10775 }
10776
10777 if (filename == NULL)
10778 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010779 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010780 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010781 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010782 if (inputStream->filename == NULL) {
10783 xmlFreeParserCtxt(ctxt);
10784 xmlFreeParserInputBuffer(buf);
10785 return(NULL);
10786 }
10787 }
Owen Taylor3473f882001-02-23 17:55:21 +000010788 inputStream->buf = buf;
10789 inputStream->base = inputStream->buf->buffer->content;
10790 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010791 inputStream->end =
10792 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010793
10794 inputPush(ctxt, inputStream);
10795
William M. Brack3a1cd212005-02-11 14:35:54 +000010796 /*
10797 * If the caller didn't provide an initial 'chunk' for determining
10798 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10799 * that it can be automatically determined later
10800 */
10801 if ((size == 0) || (chunk == NULL)) {
10802 ctxt->charset = XML_CHAR_ENCODING_NONE;
10803 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010804 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10805 int cur = ctxt->input->cur - ctxt->input->base;
10806
Owen Taylor3473f882001-02-23 17:55:21 +000010807 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010808
10809 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10810 ctxt->input->cur = ctxt->input->base + cur;
10811 ctxt->input->end =
10812 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010813#ifdef DEBUG_PUSH
10814 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10815#endif
10816 }
10817
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010818 if (enc != XML_CHAR_ENCODING_NONE) {
10819 xmlSwitchEncoding(ctxt, enc);
10820 }
10821
Owen Taylor3473f882001-02-23 17:55:21 +000010822 return(ctxt);
10823}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010824#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010825
10826/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010827 * xmlStopParser:
10828 * @ctxt: an XML parser context
10829 *
10830 * Blocks further parser processing
10831 */
10832void
10833xmlStopParser(xmlParserCtxtPtr ctxt) {
10834 if (ctxt == NULL)
10835 return;
10836 ctxt->instate = XML_PARSER_EOF;
10837 ctxt->disableSAX = 1;
10838 if (ctxt->input != NULL) {
10839 ctxt->input->cur = BAD_CAST"";
10840 ctxt->input->base = ctxt->input->cur;
10841 }
10842}
10843
10844/**
Owen Taylor3473f882001-02-23 17:55:21 +000010845 * xmlCreateIOParserCtxt:
10846 * @sax: a SAX handler
10847 * @user_data: The user data returned on SAX callbacks
10848 * @ioread: an I/O read function
10849 * @ioclose: an I/O close function
10850 * @ioctx: an I/O handler
10851 * @enc: the charset encoding if known
10852 *
10853 * Create a parser context for using the XML parser with an existing
10854 * I/O stream
10855 *
10856 * Returns the new parser context or NULL
10857 */
10858xmlParserCtxtPtr
10859xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10860 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10861 void *ioctx, xmlCharEncoding enc) {
10862 xmlParserCtxtPtr ctxt;
10863 xmlParserInputPtr inputStream;
10864 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010865
10866 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010867
10868 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10869 if (buf == NULL) return(NULL);
10870
10871 ctxt = xmlNewParserCtxt();
10872 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010873 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010874 return(NULL);
10875 }
10876 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010877#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010878 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010879#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010880 xmlFree(ctxt->sax);
10881 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10882 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010883 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010884 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010885 return(NULL);
10886 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010887 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10888 if (sax->initialized == XML_SAX2_MAGIC)
10889 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10890 else
10891 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010892 if (user_data != NULL)
10893 ctxt->userData = user_data;
10894 }
10895
10896 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10897 if (inputStream == NULL) {
10898 xmlFreeParserCtxt(ctxt);
10899 return(NULL);
10900 }
10901 inputPush(ctxt, inputStream);
10902
10903 return(ctxt);
10904}
10905
Daniel Veillard4432df22003-09-28 18:58:27 +000010906#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010907/************************************************************************
10908 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010909 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010910 * *
10911 ************************************************************************/
10912
10913/**
10914 * xmlIOParseDTD:
10915 * @sax: the SAX handler block or NULL
10916 * @input: an Input Buffer
10917 * @enc: the charset encoding if known
10918 *
10919 * Load and parse a DTD
10920 *
10921 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010922 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010923 */
10924
10925xmlDtdPtr
10926xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10927 xmlCharEncoding enc) {
10928 xmlDtdPtr ret = NULL;
10929 xmlParserCtxtPtr ctxt;
10930 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010931 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010932
10933 if (input == NULL)
10934 return(NULL);
10935
10936 ctxt = xmlNewParserCtxt();
10937 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010938 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010939 return(NULL);
10940 }
10941
10942 /*
10943 * Set-up the SAX context
10944 */
10945 if (sax != NULL) {
10946 if (ctxt->sax != NULL)
10947 xmlFree(ctxt->sax);
10948 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010949 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010950 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010951 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010952
10953 /*
10954 * generate a parser input from the I/O handler
10955 */
10956
Daniel Veillard43caefb2003-12-07 19:32:22 +000010957 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010958 if (pinput == NULL) {
10959 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010960 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010961 xmlFreeParserCtxt(ctxt);
10962 return(NULL);
10963 }
10964
10965 /*
10966 * plug some encoding conversion routines here.
10967 */
10968 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010969 if (enc != XML_CHAR_ENCODING_NONE) {
10970 xmlSwitchEncoding(ctxt, enc);
10971 }
Owen Taylor3473f882001-02-23 17:55:21 +000010972
10973 pinput->filename = NULL;
10974 pinput->line = 1;
10975 pinput->col = 1;
10976 pinput->base = ctxt->input->cur;
10977 pinput->cur = ctxt->input->cur;
10978 pinput->free = NULL;
10979
10980 /*
10981 * let's parse that entity knowing it's an external subset.
10982 */
10983 ctxt->inSubset = 2;
10984 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10985 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10986 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010987
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010988 if ((enc == XML_CHAR_ENCODING_NONE) &&
10989 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010990 /*
10991 * Get the 4 first bytes and decode the charset
10992 * if enc != XML_CHAR_ENCODING_NONE
10993 * plug some encoding conversion routines.
10994 */
10995 start[0] = RAW;
10996 start[1] = NXT(1);
10997 start[2] = NXT(2);
10998 start[3] = NXT(3);
10999 enc = xmlDetectCharEncoding(start, 4);
11000 if (enc != XML_CHAR_ENCODING_NONE) {
11001 xmlSwitchEncoding(ctxt, enc);
11002 }
11003 }
11004
Owen Taylor3473f882001-02-23 17:55:21 +000011005 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11006
11007 if (ctxt->myDoc != NULL) {
11008 if (ctxt->wellFormed) {
11009 ret = ctxt->myDoc->extSubset;
11010 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011011 if (ret != NULL) {
11012 xmlNodePtr tmp;
11013
11014 ret->doc = NULL;
11015 tmp = ret->children;
11016 while (tmp != NULL) {
11017 tmp->doc = NULL;
11018 tmp = tmp->next;
11019 }
11020 }
Owen Taylor3473f882001-02-23 17:55:21 +000011021 } else {
11022 ret = NULL;
11023 }
11024 xmlFreeDoc(ctxt->myDoc);
11025 ctxt->myDoc = NULL;
11026 }
11027 if (sax != NULL) ctxt->sax = NULL;
11028 xmlFreeParserCtxt(ctxt);
11029
11030 return(ret);
11031}
11032
11033/**
11034 * xmlSAXParseDTD:
11035 * @sax: the SAX handler block
11036 * @ExternalID: a NAME* containing the External ID of the DTD
11037 * @SystemID: a NAME* containing the URL to the DTD
11038 *
11039 * Load and parse an external subset.
11040 *
11041 * Returns the resulting xmlDtdPtr or NULL in case of error.
11042 */
11043
11044xmlDtdPtr
11045xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11046 const xmlChar *SystemID) {
11047 xmlDtdPtr ret = NULL;
11048 xmlParserCtxtPtr ctxt;
11049 xmlParserInputPtr input = NULL;
11050 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011051 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011052
11053 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11054
11055 ctxt = xmlNewParserCtxt();
11056 if (ctxt == NULL) {
11057 return(NULL);
11058 }
11059
11060 /*
11061 * Set-up the SAX context
11062 */
11063 if (sax != NULL) {
11064 if (ctxt->sax != NULL)
11065 xmlFree(ctxt->sax);
11066 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011067 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011068 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011069
11070 /*
11071 * Canonicalise the system ID
11072 */
11073 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011074 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011075 xmlFreeParserCtxt(ctxt);
11076 return(NULL);
11077 }
Owen Taylor3473f882001-02-23 17:55:21 +000011078
11079 /*
11080 * Ask the Entity resolver to load the damn thing
11081 */
11082
11083 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011084 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11085 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011086 if (input == NULL) {
11087 if (sax != NULL) ctxt->sax = NULL;
11088 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011089 if (systemIdCanonic != NULL)
11090 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011091 return(NULL);
11092 }
11093
11094 /*
11095 * plug some encoding conversion routines here.
11096 */
11097 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011098 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11099 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11100 xmlSwitchEncoding(ctxt, enc);
11101 }
Owen Taylor3473f882001-02-23 17:55:21 +000011102
11103 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011104 input->filename = (char *) systemIdCanonic;
11105 else
11106 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011107 input->line = 1;
11108 input->col = 1;
11109 input->base = ctxt->input->cur;
11110 input->cur = ctxt->input->cur;
11111 input->free = NULL;
11112
11113 /*
11114 * let's parse that entity knowing it's an external subset.
11115 */
11116 ctxt->inSubset = 2;
11117 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11118 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11119 ExternalID, SystemID);
11120 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11121
11122 if (ctxt->myDoc != NULL) {
11123 if (ctxt->wellFormed) {
11124 ret = ctxt->myDoc->extSubset;
11125 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011126 if (ret != NULL) {
11127 xmlNodePtr tmp;
11128
11129 ret->doc = NULL;
11130 tmp = ret->children;
11131 while (tmp != NULL) {
11132 tmp->doc = NULL;
11133 tmp = tmp->next;
11134 }
11135 }
Owen Taylor3473f882001-02-23 17:55:21 +000011136 } else {
11137 ret = NULL;
11138 }
11139 xmlFreeDoc(ctxt->myDoc);
11140 ctxt->myDoc = NULL;
11141 }
11142 if (sax != NULL) ctxt->sax = NULL;
11143 xmlFreeParserCtxt(ctxt);
11144
11145 return(ret);
11146}
11147
Daniel Veillard4432df22003-09-28 18:58:27 +000011148
Owen Taylor3473f882001-02-23 17:55:21 +000011149/**
11150 * xmlParseDTD:
11151 * @ExternalID: a NAME* containing the External ID of the DTD
11152 * @SystemID: a NAME* containing the URL to the DTD
11153 *
11154 * Load and parse an external subset.
11155 *
11156 * Returns the resulting xmlDtdPtr or NULL in case of error.
11157 */
11158
11159xmlDtdPtr
11160xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11161 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11162}
Daniel Veillard4432df22003-09-28 18:58:27 +000011163#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011164
11165/************************************************************************
11166 * *
11167 * Front ends when parsing an Entity *
11168 * *
11169 ************************************************************************/
11170
11171/**
Owen Taylor3473f882001-02-23 17:55:21 +000011172 * xmlParseCtxtExternalEntity:
11173 * @ctx: the existing parsing context
11174 * @URL: the URL for the entity to load
11175 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011176 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011177 *
11178 * Parse an external general entity within an existing parsing context
11179 * An external general parsed entity is well-formed if it matches the
11180 * production labeled extParsedEnt.
11181 *
11182 * [78] extParsedEnt ::= TextDecl? content
11183 *
11184 * Returns 0 if the entity is well formed, -1 in case of args problem and
11185 * the parser error code otherwise
11186 */
11187
11188int
11189xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011190 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011191 xmlParserCtxtPtr ctxt;
11192 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011193 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011194 xmlSAXHandlerPtr oldsax = NULL;
11195 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011196 xmlChar start[4];
11197 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011198 xmlParserInputPtr inputStream;
11199 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011200
Daniel Veillardce682bc2004-11-05 17:22:25 +000011201 if (ctx == NULL) return(-1);
11202
Owen Taylor3473f882001-02-23 17:55:21 +000011203 if (ctx->depth > 40) {
11204 return(XML_ERR_ENTITY_LOOP);
11205 }
11206
Daniel Veillardcda96922001-08-21 10:56:31 +000011207 if (lst != NULL)
11208 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011209 if ((URL == NULL) && (ID == NULL))
11210 return(-1);
11211 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11212 return(-1);
11213
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011214 ctxt = xmlNewParserCtxt();
11215 if (ctxt == NULL) {
11216 return(-1);
11217 }
11218
Owen Taylor3473f882001-02-23 17:55:21 +000011219 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011220 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011221
11222 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11223 if (inputStream == NULL) {
11224 xmlFreeParserCtxt(ctxt);
11225 return(-1);
11226 }
11227
11228 inputPush(ctxt, inputStream);
11229
11230 if ((ctxt->directory == NULL) && (directory == NULL))
11231 directory = xmlParserGetDirectory((char *)URL);
11232 if ((ctxt->directory == NULL) && (directory != NULL))
11233 ctxt->directory = directory;
11234
Owen Taylor3473f882001-02-23 17:55:21 +000011235 oldsax = ctxt->sax;
11236 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011237 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011238 newDoc = xmlNewDoc(BAD_CAST "1.0");
11239 if (newDoc == NULL) {
11240 xmlFreeParserCtxt(ctxt);
11241 return(-1);
11242 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011243 if (ctx->myDoc->dict) {
11244 newDoc->dict = ctx->myDoc->dict;
11245 xmlDictReference(newDoc->dict);
11246 }
Owen Taylor3473f882001-02-23 17:55:21 +000011247 if (ctx->myDoc != NULL) {
11248 newDoc->intSubset = ctx->myDoc->intSubset;
11249 newDoc->extSubset = ctx->myDoc->extSubset;
11250 }
11251 if (ctx->myDoc->URL != NULL) {
11252 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11253 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011254 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11255 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011256 ctxt->sax = oldsax;
11257 xmlFreeParserCtxt(ctxt);
11258 newDoc->intSubset = NULL;
11259 newDoc->extSubset = NULL;
11260 xmlFreeDoc(newDoc);
11261 return(-1);
11262 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011263 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 nodePush(ctxt, newDoc->children);
11265 if (ctx->myDoc == NULL) {
11266 ctxt->myDoc = newDoc;
11267 } else {
11268 ctxt->myDoc = ctx->myDoc;
11269 newDoc->children->doc = ctx->myDoc;
11270 }
11271
Daniel Veillard87a764e2001-06-20 17:41:10 +000011272 /*
11273 * Get the 4 first bytes and decode the charset
11274 * if enc != XML_CHAR_ENCODING_NONE
11275 * plug some encoding conversion routines.
11276 */
11277 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011278 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11279 start[0] = RAW;
11280 start[1] = NXT(1);
11281 start[2] = NXT(2);
11282 start[3] = NXT(3);
11283 enc = xmlDetectCharEncoding(start, 4);
11284 if (enc != XML_CHAR_ENCODING_NONE) {
11285 xmlSwitchEncoding(ctxt, enc);
11286 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011287 }
11288
Owen Taylor3473f882001-02-23 17:55:21 +000011289 /*
11290 * Parse a possible text declaration first
11291 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011292 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011293 xmlParseTextDecl(ctxt);
11294 }
11295
11296 /*
11297 * Doing validity checking on chunk doesn't make sense
11298 */
11299 ctxt->instate = XML_PARSER_CONTENT;
11300 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011301 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011302 ctxt->loadsubset = ctx->loadsubset;
11303 ctxt->depth = ctx->depth + 1;
11304 ctxt->replaceEntities = ctx->replaceEntities;
11305 if (ctxt->validate) {
11306 ctxt->vctxt.error = ctx->vctxt.error;
11307 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011308 } else {
11309 ctxt->vctxt.error = NULL;
11310 ctxt->vctxt.warning = NULL;
11311 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011312 ctxt->vctxt.nodeTab = NULL;
11313 ctxt->vctxt.nodeNr = 0;
11314 ctxt->vctxt.nodeMax = 0;
11315 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011316 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11317 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011318 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11319 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11320 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011321 ctxt->dictNames = ctx->dictNames;
11322 ctxt->attsDefault = ctx->attsDefault;
11323 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011324 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011325
11326 xmlParseContent(ctxt);
11327
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011328 ctx->validate = ctxt->validate;
11329 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011330 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011331 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011332 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011333 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011334 }
11335 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011336 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011337 }
11338
11339 if (!ctxt->wellFormed) {
11340 if (ctxt->errNo == 0)
11341 ret = 1;
11342 else
11343 ret = ctxt->errNo;
11344 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011345 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011346 xmlNodePtr cur;
11347
11348 /*
11349 * Return the newly created nodeset after unlinking it from
11350 * they pseudo parent.
11351 */
11352 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011353 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011354 while (cur != NULL) {
11355 cur->parent = NULL;
11356 cur = cur->next;
11357 }
11358 newDoc->children->children = NULL;
11359 }
11360 ret = 0;
11361 }
11362 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011363 ctxt->dict = NULL;
11364 ctxt->attsDefault = NULL;
11365 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011366 xmlFreeParserCtxt(ctxt);
11367 newDoc->intSubset = NULL;
11368 newDoc->extSubset = NULL;
11369 xmlFreeDoc(newDoc);
11370
11371 return(ret);
11372}
11373
11374/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011375 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011376 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011377 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011378 * @sax: the SAX handler bloc (possibly NULL)
11379 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11380 * @depth: Used for loop detection, use 0
11381 * @URL: the URL for the entity to load
11382 * @ID: the System ID for the entity to load
11383 * @list: the return value for the set of parsed nodes
11384 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011385 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011386 *
11387 * Returns 0 if the entity is well formed, -1 in case of args problem and
11388 * the parser error code otherwise
11389 */
11390
Daniel Veillard7d515752003-09-26 19:12:37 +000011391static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011392xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11393 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011394 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011395 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011396 xmlParserCtxtPtr ctxt;
11397 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011398 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011399 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011400 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011401 xmlChar start[4];
11402 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011403
11404 if (depth > 40) {
11405 return(XML_ERR_ENTITY_LOOP);
11406 }
11407
11408
11409
11410 if (list != NULL)
11411 *list = NULL;
11412 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011413 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011414 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011415 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011416
11417
11418 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011419 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011420 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011421 if (oldctxt != NULL) {
11422 ctxt->_private = oldctxt->_private;
11423 ctxt->loadsubset = oldctxt->loadsubset;
11424 ctxt->validate = oldctxt->validate;
11425 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011426 ctxt->record_info = oldctxt->record_info;
11427 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11428 ctxt->node_seq.length = oldctxt->node_seq.length;
11429 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011430 } else {
11431 /*
11432 * Doing validity checking on chunk without context
11433 * doesn't make sense
11434 */
11435 ctxt->_private = NULL;
11436 ctxt->validate = 0;
11437 ctxt->external = 2;
11438 ctxt->loadsubset = 0;
11439 }
Owen Taylor3473f882001-02-23 17:55:21 +000011440 if (sax != NULL) {
11441 oldsax = ctxt->sax;
11442 ctxt->sax = sax;
11443 if (user_data != NULL)
11444 ctxt->userData = user_data;
11445 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011446 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011447 newDoc = xmlNewDoc(BAD_CAST "1.0");
11448 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011449 ctxt->node_seq.maximum = 0;
11450 ctxt->node_seq.length = 0;
11451 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011452 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011453 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011454 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011455 newDoc->intSubset = doc->intSubset;
11456 newDoc->extSubset = doc->extSubset;
11457 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011458 xmlDictReference(newDoc->dict);
11459
Owen Taylor3473f882001-02-23 17:55:21 +000011460 if (doc->URL != NULL) {
11461 newDoc->URL = xmlStrdup(doc->URL);
11462 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011463 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11464 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011465 if (sax != NULL)
11466 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011467 ctxt->node_seq.maximum = 0;
11468 ctxt->node_seq.length = 0;
11469 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011470 xmlFreeParserCtxt(ctxt);
11471 newDoc->intSubset = NULL;
11472 newDoc->extSubset = NULL;
11473 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011474 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011475 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011476 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011477 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011478 ctxt->myDoc = doc;
11479 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011480
Daniel Veillard87a764e2001-06-20 17:41:10 +000011481 /*
11482 * Get the 4 first bytes and decode the charset
11483 * if enc != XML_CHAR_ENCODING_NONE
11484 * plug some encoding conversion routines.
11485 */
11486 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011487 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11488 start[0] = RAW;
11489 start[1] = NXT(1);
11490 start[2] = NXT(2);
11491 start[3] = NXT(3);
11492 enc = xmlDetectCharEncoding(start, 4);
11493 if (enc != XML_CHAR_ENCODING_NONE) {
11494 xmlSwitchEncoding(ctxt, enc);
11495 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011496 }
11497
Owen Taylor3473f882001-02-23 17:55:21 +000011498 /*
11499 * Parse a possible text declaration first
11500 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011501 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011502 xmlParseTextDecl(ctxt);
11503 }
11504
Owen Taylor3473f882001-02-23 17:55:21 +000011505 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011506 ctxt->depth = depth;
11507
11508 xmlParseContent(ctxt);
11509
Daniel Veillard561b7f82002-03-20 21:55:57 +000011510 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011512 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011513 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011514 }
11515 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011516 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011517 }
11518
11519 if (!ctxt->wellFormed) {
11520 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011521 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011522 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011523 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011524 } else {
11525 if (list != NULL) {
11526 xmlNodePtr cur;
11527
11528 /*
11529 * Return the newly created nodeset after unlinking it from
11530 * they pseudo parent.
11531 */
11532 cur = newDoc->children->children;
11533 *list = cur;
11534 while (cur != NULL) {
11535 cur->parent = NULL;
11536 cur = cur->next;
11537 }
11538 newDoc->children->children = NULL;
11539 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011540 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011541 }
11542 if (sax != NULL)
11543 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011544 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11545 oldctxt->node_seq.length = ctxt->node_seq.length;
11546 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011547 ctxt->node_seq.maximum = 0;
11548 ctxt->node_seq.length = 0;
11549 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011550 xmlFreeParserCtxt(ctxt);
11551 newDoc->intSubset = NULL;
11552 newDoc->extSubset = NULL;
11553 xmlFreeDoc(newDoc);
11554
11555 return(ret);
11556}
11557
Daniel Veillard81273902003-09-30 00:43:48 +000011558#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011559/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011560 * xmlParseExternalEntity:
11561 * @doc: the document the chunk pertains to
11562 * @sax: the SAX handler bloc (possibly NULL)
11563 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11564 * @depth: Used for loop detection, use 0
11565 * @URL: the URL for the entity to load
11566 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011567 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011568 *
11569 * Parse an external general entity
11570 * An external general parsed entity is well-formed if it matches the
11571 * production labeled extParsedEnt.
11572 *
11573 * [78] extParsedEnt ::= TextDecl? content
11574 *
11575 * Returns 0 if the entity is well formed, -1 in case of args problem and
11576 * the parser error code otherwise
11577 */
11578
11579int
11580xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011581 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011582 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011583 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011584}
11585
11586/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011587 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011588 * @doc: the document the chunk pertains to
11589 * @sax: the SAX handler bloc (possibly NULL)
11590 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11591 * @depth: Used for loop detection, use 0
11592 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011593 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011594 *
11595 * Parse a well-balanced chunk of an XML document
11596 * called by the parser
11597 * The allowed sequence for the Well Balanced Chunk is the one defined by
11598 * the content production in the XML grammar:
11599 *
11600 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11601 *
11602 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11603 * the parser error code otherwise
11604 */
11605
11606int
11607xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011608 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011609 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11610 depth, string, lst, 0 );
11611}
Daniel Veillard81273902003-09-30 00:43:48 +000011612#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011613
11614/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011615 * xmlParseBalancedChunkMemoryInternal:
11616 * @oldctxt: the existing parsing context
11617 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11618 * @user_data: the user data field for the parser context
11619 * @lst: the return value for the set of parsed nodes
11620 *
11621 *
11622 * Parse a well-balanced chunk of an XML document
11623 * called by the parser
11624 * The allowed sequence for the Well Balanced Chunk is the one defined by
11625 * the content production in the XML grammar:
11626 *
11627 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11628 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011629 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11630 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011631 *
11632 * In case recover is set to 1, the nodelist will not be empty even if
11633 * the parsed chunk is not well balanced.
11634 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011635static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011636xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11637 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11638 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011639 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011640 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011641 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011642 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011643 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011645 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011646
11647 if (oldctxt->depth > 40) {
11648 return(XML_ERR_ENTITY_LOOP);
11649 }
11650
11651
11652 if (lst != NULL)
11653 *lst = NULL;
11654 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011655 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011656
11657 size = xmlStrlen(string);
11658
11659 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011660 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011661 if (user_data != NULL)
11662 ctxt->userData = user_data;
11663 else
11664 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011665 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11666 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011667 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11668 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11669 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011670
11671 oldsax = ctxt->sax;
11672 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011673 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011674 ctxt->replaceEntities = oldctxt->replaceEntities;
11675 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011676
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011677 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011678 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011679 newDoc = xmlNewDoc(BAD_CAST "1.0");
11680 if (newDoc == NULL) {
11681 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011682 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011683 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011684 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011685 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011686 newDoc->dict = ctxt->dict;
11687 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011688 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011689 } else {
11690 ctxt->myDoc = oldctxt->myDoc;
11691 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011692 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011693 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011694 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11695 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011696 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011697 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011698 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011699 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011700 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011701 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011702 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011703 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011704 ctxt->myDoc->children = NULL;
11705 ctxt->myDoc->last = NULL;
11706 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011707 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011708 ctxt->instate = XML_PARSER_CONTENT;
11709 ctxt->depth = oldctxt->depth + 1;
11710
Daniel Veillard328f48c2002-11-15 15:24:34 +000011711 ctxt->validate = 0;
11712 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011713 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11714 /*
11715 * ID/IDREF registration will be done in xmlValidateElement below
11716 */
11717 ctxt->loadsubset |= XML_SKIP_IDS;
11718 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011719 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011720 ctxt->attsDefault = oldctxt->attsDefault;
11721 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011722
Daniel Veillard68e9e742002-11-16 15:35:11 +000011723 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011724 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011725 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011726 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011727 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011728 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011729 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011730 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011731 }
11732
11733 if (!ctxt->wellFormed) {
11734 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011735 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011736 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011737 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011738 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011739 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011740 }
11741
William M. Brack7b9154b2003-09-27 19:23:50 +000011742 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011743 xmlNodePtr cur;
11744
11745 /*
11746 * Return the newly created nodeset after unlinking it from
11747 * they pseudo parent.
11748 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011749 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011750 *lst = cur;
11751 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011752#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011753 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11754 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11755 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011756 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11757 oldctxt->myDoc, cur);
11758 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011759#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011760 cur->parent = NULL;
11761 cur = cur->next;
11762 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011763 ctxt->myDoc->children->children = NULL;
11764 }
11765 if (ctxt->myDoc != NULL) {
11766 xmlFreeNode(ctxt->myDoc->children);
11767 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011768 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011769 }
11770
11771 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011772 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011773 ctxt->attsDefault = NULL;
11774 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011775 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011776 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011777 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011778 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011779
11780 return(ret);
11781}
11782
Daniel Veillard29b17482004-08-16 00:39:03 +000011783/**
11784 * xmlParseInNodeContext:
11785 * @node: the context node
11786 * @data: the input string
11787 * @datalen: the input string length in bytes
11788 * @options: a combination of xmlParserOption
11789 * @lst: the return value for the set of parsed nodes
11790 *
11791 * Parse a well-balanced chunk of an XML document
11792 * within the context (DTD, namespaces, etc ...) of the given node.
11793 *
11794 * The allowed sequence for the data is a Well Balanced Chunk defined by
11795 * the content production in the XML grammar:
11796 *
11797 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11798 *
11799 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11800 * error code otherwise
11801 */
11802xmlParserErrors
11803xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11804 int options, xmlNodePtr *lst) {
11805#ifdef SAX2
11806 xmlParserCtxtPtr ctxt;
11807 xmlDocPtr doc = NULL;
11808 xmlNodePtr fake, cur;
11809 int nsnr = 0;
11810
11811 xmlParserErrors ret = XML_ERR_OK;
11812
11813 /*
11814 * check all input parameters, grab the document
11815 */
11816 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11817 return(XML_ERR_INTERNAL_ERROR);
11818 switch (node->type) {
11819 case XML_ELEMENT_NODE:
11820 case XML_ATTRIBUTE_NODE:
11821 case XML_TEXT_NODE:
11822 case XML_CDATA_SECTION_NODE:
11823 case XML_ENTITY_REF_NODE:
11824 case XML_PI_NODE:
11825 case XML_COMMENT_NODE:
11826 case XML_DOCUMENT_NODE:
11827 case XML_HTML_DOCUMENT_NODE:
11828 break;
11829 default:
11830 return(XML_ERR_INTERNAL_ERROR);
11831
11832 }
11833 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11834 (node->type != XML_DOCUMENT_NODE) &&
11835 (node->type != XML_HTML_DOCUMENT_NODE))
11836 node = node->parent;
11837 if (node == NULL)
11838 return(XML_ERR_INTERNAL_ERROR);
11839 if (node->type == XML_ELEMENT_NODE)
11840 doc = node->doc;
11841 else
11842 doc = (xmlDocPtr) node;
11843 if (doc == NULL)
11844 return(XML_ERR_INTERNAL_ERROR);
11845
11846 /*
11847 * allocate a context and set-up everything not related to the
11848 * node position in the tree
11849 */
11850 if (doc->type == XML_DOCUMENT_NODE)
11851 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11852#ifdef LIBXML_HTML_ENABLED
11853 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11854 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11855#endif
11856 else
11857 return(XML_ERR_INTERNAL_ERROR);
11858
11859 if (ctxt == NULL)
11860 return(XML_ERR_NO_MEMORY);
11861 fake = xmlNewComment(NULL);
11862 if (fake == NULL) {
11863 xmlFreeParserCtxt(ctxt);
11864 return(XML_ERR_NO_MEMORY);
11865 }
11866 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011867
11868 /*
11869 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11870 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11871 * we must wait until the last moment to free the original one.
11872 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011873 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011874 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011875 xmlDictFree(ctxt->dict);
11876 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011877 } else
11878 options |= XML_PARSE_NODICT;
11879
11880 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011881 xmlDetectSAX2(ctxt);
11882 ctxt->myDoc = doc;
11883
11884 if (node->type == XML_ELEMENT_NODE) {
11885 nodePush(ctxt, node);
11886 /*
11887 * initialize the SAX2 namespaces stack
11888 */
11889 cur = node;
11890 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11891 xmlNsPtr ns = cur->nsDef;
11892 const xmlChar *iprefix, *ihref;
11893
11894 while (ns != NULL) {
11895 if (ctxt->dict) {
11896 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11897 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11898 } else {
11899 iprefix = ns->prefix;
11900 ihref = ns->href;
11901 }
11902
11903 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11904 nsPush(ctxt, iprefix, ihref);
11905 nsnr++;
11906 }
11907 ns = ns->next;
11908 }
11909 cur = cur->parent;
11910 }
11911 ctxt->instate = XML_PARSER_CONTENT;
11912 }
11913
11914 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11915 /*
11916 * ID/IDREF registration will be done in xmlValidateElement below
11917 */
11918 ctxt->loadsubset |= XML_SKIP_IDS;
11919 }
11920
Daniel Veillard499cc922006-01-18 17:22:35 +000011921#ifdef LIBXML_HTML_ENABLED
11922 if (doc->type == XML_HTML_DOCUMENT_NODE)
11923 __htmlParseContent(ctxt);
11924 else
11925#endif
11926 xmlParseContent(ctxt);
11927
Daniel Veillard29b17482004-08-16 00:39:03 +000011928 nsPop(ctxt, nsnr);
11929 if ((RAW == '<') && (NXT(1) == '/')) {
11930 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11931 } else if (RAW != 0) {
11932 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11933 }
11934 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11935 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11936 ctxt->wellFormed = 0;
11937 }
11938
11939 if (!ctxt->wellFormed) {
11940 if (ctxt->errNo == 0)
11941 ret = XML_ERR_INTERNAL_ERROR;
11942 else
11943 ret = (xmlParserErrors)ctxt->errNo;
11944 } else {
11945 ret = XML_ERR_OK;
11946 }
11947
11948 /*
11949 * Return the newly created nodeset after unlinking it from
11950 * the pseudo sibling.
11951 */
11952
11953 cur = fake->next;
11954 fake->next = NULL;
11955 node->last = fake;
11956
11957 if (cur != NULL) {
11958 cur->prev = NULL;
11959 }
11960
11961 *lst = cur;
11962
11963 while (cur != NULL) {
11964 cur->parent = NULL;
11965 cur = cur->next;
11966 }
11967
11968 xmlUnlinkNode(fake);
11969 xmlFreeNode(fake);
11970
11971
11972 if (ret != XML_ERR_OK) {
11973 xmlFreeNodeList(*lst);
11974 *lst = NULL;
11975 }
William M. Brackc3f81342004-10-03 01:22:44 +000011976
William M. Brackb7b54de2004-10-06 16:38:01 +000011977 if (doc->dict != NULL)
11978 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011979 xmlFreeParserCtxt(ctxt);
11980
11981 return(ret);
11982#else /* !SAX2 */
11983 return(XML_ERR_INTERNAL_ERROR);
11984#endif
11985}
11986
Daniel Veillard81273902003-09-30 00:43:48 +000011987#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011988/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011989 * xmlParseBalancedChunkMemoryRecover:
11990 * @doc: the document the chunk pertains to
11991 * @sax: the SAX handler bloc (possibly NULL)
11992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11993 * @depth: Used for loop detection, use 0
11994 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11995 * @lst: the return value for the set of parsed nodes
11996 * @recover: return nodes even if the data is broken (use 0)
11997 *
11998 *
11999 * Parse a well-balanced chunk of an XML document
12000 * called by the parser
12001 * The allowed sequence for the Well Balanced Chunk is the one defined by
12002 * the content production in the XML grammar:
12003 *
12004 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12005 *
12006 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12007 * the parser error code otherwise
12008 *
12009 * In case recover is set to 1, the nodelist will not be empty even if
12010 * the parsed chunk is not well balanced.
12011 */
12012int
12013xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12014 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12015 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012016 xmlParserCtxtPtr ctxt;
12017 xmlDocPtr newDoc;
12018 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012019 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012020 int size;
12021 int ret = 0;
12022
12023 if (depth > 40) {
12024 return(XML_ERR_ENTITY_LOOP);
12025 }
12026
12027
Daniel Veillardcda96922001-08-21 10:56:31 +000012028 if (lst != NULL)
12029 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012030 if (string == NULL)
12031 return(-1);
12032
12033 size = xmlStrlen(string);
12034
12035 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12036 if (ctxt == NULL) return(-1);
12037 ctxt->userData = ctxt;
12038 if (sax != NULL) {
12039 oldsax = ctxt->sax;
12040 ctxt->sax = sax;
12041 if (user_data != NULL)
12042 ctxt->userData = user_data;
12043 }
12044 newDoc = xmlNewDoc(BAD_CAST "1.0");
12045 if (newDoc == NULL) {
12046 xmlFreeParserCtxt(ctxt);
12047 return(-1);
12048 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012049 if ((doc != NULL) && (doc->dict != NULL)) {
12050 xmlDictFree(ctxt->dict);
12051 ctxt->dict = doc->dict;
12052 xmlDictReference(ctxt->dict);
12053 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12054 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12055 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12056 ctxt->dictNames = 1;
12057 } else {
12058 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12059 }
Owen Taylor3473f882001-02-23 17:55:21 +000012060 if (doc != NULL) {
12061 newDoc->intSubset = doc->intSubset;
12062 newDoc->extSubset = doc->extSubset;
12063 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012064 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12065 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012066 if (sax != NULL)
12067 ctxt->sax = oldsax;
12068 xmlFreeParserCtxt(ctxt);
12069 newDoc->intSubset = NULL;
12070 newDoc->extSubset = NULL;
12071 xmlFreeDoc(newDoc);
12072 return(-1);
12073 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012074 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12075 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012076 if (doc == NULL) {
12077 ctxt->myDoc = newDoc;
12078 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012079 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012080 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012081 /* Ensure that doc has XML spec namespace */
12082 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12083 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012084 }
12085 ctxt->instate = XML_PARSER_CONTENT;
12086 ctxt->depth = depth;
12087
12088 /*
12089 * Doing validity checking on chunk doesn't make sense
12090 */
12091 ctxt->validate = 0;
12092 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012093 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012094
Daniel Veillardb39bc392002-10-26 19:29:51 +000012095 if ( doc != NULL ){
12096 content = doc->children;
12097 doc->children = NULL;
12098 xmlParseContent(ctxt);
12099 doc->children = content;
12100 }
12101 else {
12102 xmlParseContent(ctxt);
12103 }
Owen Taylor3473f882001-02-23 17:55:21 +000012104 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012105 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012107 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012108 }
12109 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012110 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012111 }
12112
12113 if (!ctxt->wellFormed) {
12114 if (ctxt->errNo == 0)
12115 ret = 1;
12116 else
12117 ret = ctxt->errNo;
12118 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012119 ret = 0;
12120 }
12121
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012122 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12123 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012124
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012125 /*
12126 * Return the newly created nodeset after unlinking it from
12127 * they pseudo parent.
12128 */
12129 cur = newDoc->children->children;
12130 *lst = cur;
12131 while (cur != NULL) {
12132 xmlSetTreeDoc(cur, doc);
12133 cur->parent = NULL;
12134 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012135 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012136 newDoc->children->children = NULL;
12137 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012138
Owen Taylor3473f882001-02-23 17:55:21 +000012139 if (sax != NULL)
12140 ctxt->sax = oldsax;
12141 xmlFreeParserCtxt(ctxt);
12142 newDoc->intSubset = NULL;
12143 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012144 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012145 xmlFreeDoc(newDoc);
12146
12147 return(ret);
12148}
12149
12150/**
12151 * xmlSAXParseEntity:
12152 * @sax: the SAX handler block
12153 * @filename: the filename
12154 *
12155 * parse an XML external entity out of context and build a tree.
12156 * It use the given SAX function block to handle the parsing callback.
12157 * If sax is NULL, fallback to the default DOM tree building routines.
12158 *
12159 * [78] extParsedEnt ::= TextDecl? content
12160 *
12161 * This correspond to a "Well Balanced" chunk
12162 *
12163 * Returns the resulting document tree
12164 */
12165
12166xmlDocPtr
12167xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12168 xmlDocPtr ret;
12169 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012170
12171 ctxt = xmlCreateFileParserCtxt(filename);
12172 if (ctxt == NULL) {
12173 return(NULL);
12174 }
12175 if (sax != NULL) {
12176 if (ctxt->sax != NULL)
12177 xmlFree(ctxt->sax);
12178 ctxt->sax = sax;
12179 ctxt->userData = NULL;
12180 }
12181
Owen Taylor3473f882001-02-23 17:55:21 +000012182 xmlParseExtParsedEnt(ctxt);
12183
12184 if (ctxt->wellFormed)
12185 ret = ctxt->myDoc;
12186 else {
12187 ret = NULL;
12188 xmlFreeDoc(ctxt->myDoc);
12189 ctxt->myDoc = NULL;
12190 }
12191 if (sax != NULL)
12192 ctxt->sax = NULL;
12193 xmlFreeParserCtxt(ctxt);
12194
12195 return(ret);
12196}
12197
12198/**
12199 * xmlParseEntity:
12200 * @filename: the filename
12201 *
12202 * parse an XML external entity out of context and build a tree.
12203 *
12204 * [78] extParsedEnt ::= TextDecl? content
12205 *
12206 * This correspond to a "Well Balanced" chunk
12207 *
12208 * Returns the resulting document tree
12209 */
12210
12211xmlDocPtr
12212xmlParseEntity(const char *filename) {
12213 return(xmlSAXParseEntity(NULL, filename));
12214}
Daniel Veillard81273902003-09-30 00:43:48 +000012215#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012216
12217/**
12218 * xmlCreateEntityParserCtxt:
12219 * @URL: the entity URL
12220 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012221 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012222 *
12223 * Create a parser context for an external entity
12224 * Automatic support for ZLIB/Compress compressed document is provided
12225 * by default if found at compile-time.
12226 *
12227 * Returns the new parser context or NULL
12228 */
12229xmlParserCtxtPtr
12230xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12231 const xmlChar *base) {
12232 xmlParserCtxtPtr ctxt;
12233 xmlParserInputPtr inputStream;
12234 char *directory = NULL;
12235 xmlChar *uri;
12236
12237 ctxt = xmlNewParserCtxt();
12238 if (ctxt == NULL) {
12239 return(NULL);
12240 }
12241
12242 uri = xmlBuildURI(URL, base);
12243
12244 if (uri == NULL) {
12245 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12246 if (inputStream == NULL) {
12247 xmlFreeParserCtxt(ctxt);
12248 return(NULL);
12249 }
12250
12251 inputPush(ctxt, inputStream);
12252
12253 if ((ctxt->directory == NULL) && (directory == NULL))
12254 directory = xmlParserGetDirectory((char *)URL);
12255 if ((ctxt->directory == NULL) && (directory != NULL))
12256 ctxt->directory = directory;
12257 } else {
12258 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12259 if (inputStream == NULL) {
12260 xmlFree(uri);
12261 xmlFreeParserCtxt(ctxt);
12262 return(NULL);
12263 }
12264
12265 inputPush(ctxt, inputStream);
12266
12267 if ((ctxt->directory == NULL) && (directory == NULL))
12268 directory = xmlParserGetDirectory((char *)uri);
12269 if ((ctxt->directory == NULL) && (directory != NULL))
12270 ctxt->directory = directory;
12271 xmlFree(uri);
12272 }
Owen Taylor3473f882001-02-23 17:55:21 +000012273 return(ctxt);
12274}
12275
12276/************************************************************************
12277 * *
12278 * Front ends when parsing from a file *
12279 * *
12280 ************************************************************************/
12281
12282/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012283 * xmlCreateURLParserCtxt:
12284 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012285 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012286 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012287 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012288 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012289 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012290 *
12291 * Returns the new parser context or NULL
12292 */
12293xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012294xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012295{
12296 xmlParserCtxtPtr ctxt;
12297 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012298 char *directory = NULL;
12299
Owen Taylor3473f882001-02-23 17:55:21 +000012300 ctxt = xmlNewParserCtxt();
12301 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012302 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012303 return(NULL);
12304 }
12305
Daniel Veillarddf292f72005-01-16 19:00:15 +000012306 if (options)
12307 xmlCtxtUseOptions(ctxt, options);
12308 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012309
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012310 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012311 if (inputStream == NULL) {
12312 xmlFreeParserCtxt(ctxt);
12313 return(NULL);
12314 }
12315
Owen Taylor3473f882001-02-23 17:55:21 +000012316 inputPush(ctxt, inputStream);
12317 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012318 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012319 if ((ctxt->directory == NULL) && (directory != NULL))
12320 ctxt->directory = directory;
12321
12322 return(ctxt);
12323}
12324
Daniel Veillard61b93382003-11-03 14:28:31 +000012325/**
12326 * xmlCreateFileParserCtxt:
12327 * @filename: the filename
12328 *
12329 * Create a parser context for a file content.
12330 * Automatic support for ZLIB/Compress compressed document is provided
12331 * by default if found at compile-time.
12332 *
12333 * Returns the new parser context or NULL
12334 */
12335xmlParserCtxtPtr
12336xmlCreateFileParserCtxt(const char *filename)
12337{
12338 return(xmlCreateURLParserCtxt(filename, 0));
12339}
12340
Daniel Veillard81273902003-09-30 00:43:48 +000012341#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012342/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012343 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012344 * @sax: the SAX handler block
12345 * @filename: the filename
12346 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12347 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012348 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012349 *
12350 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12351 * compressed document is provided by default if found at compile-time.
12352 * It use the given SAX function block to handle the parsing callback.
12353 * If sax is NULL, fallback to the default DOM tree building routines.
12354 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012355 * User data (void *) is stored within the parser context in the
12356 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012357 *
Owen Taylor3473f882001-02-23 17:55:21 +000012358 * Returns the resulting document tree
12359 */
12360
12361xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012362xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12363 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012364 xmlDocPtr ret;
12365 xmlParserCtxtPtr ctxt;
12366 char *directory = NULL;
12367
Daniel Veillard635ef722001-10-29 11:48:19 +000012368 xmlInitParser();
12369
Owen Taylor3473f882001-02-23 17:55:21 +000012370 ctxt = xmlCreateFileParserCtxt(filename);
12371 if (ctxt == NULL) {
12372 return(NULL);
12373 }
12374 if (sax != NULL) {
12375 if (ctxt->sax != NULL)
12376 xmlFree(ctxt->sax);
12377 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012378 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012379 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012380 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012381 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012382 }
Owen Taylor3473f882001-02-23 17:55:21 +000012383
12384 if ((ctxt->directory == NULL) && (directory == NULL))
12385 directory = xmlParserGetDirectory(filename);
12386 if ((ctxt->directory == NULL) && (directory != NULL))
12387 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12388
Daniel Veillarddad3f682002-11-17 16:47:27 +000012389 ctxt->recovery = recovery;
12390
Owen Taylor3473f882001-02-23 17:55:21 +000012391 xmlParseDocument(ctxt);
12392
William M. Brackc07329e2003-09-08 01:57:30 +000012393 if ((ctxt->wellFormed) || recovery) {
12394 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012395 if (ret != NULL) {
12396 if (ctxt->input->buf->compressed > 0)
12397 ret->compression = 9;
12398 else
12399 ret->compression = ctxt->input->buf->compressed;
12400 }
William M. Brackc07329e2003-09-08 01:57:30 +000012401 }
Owen Taylor3473f882001-02-23 17:55:21 +000012402 else {
12403 ret = NULL;
12404 xmlFreeDoc(ctxt->myDoc);
12405 ctxt->myDoc = NULL;
12406 }
12407 if (sax != NULL)
12408 ctxt->sax = NULL;
12409 xmlFreeParserCtxt(ctxt);
12410
12411 return(ret);
12412}
12413
12414/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012415 * xmlSAXParseFile:
12416 * @sax: the SAX handler block
12417 * @filename: the filename
12418 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12419 * documents
12420 *
12421 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12422 * compressed document is provided by default if found at compile-time.
12423 * It use the given SAX function block to handle the parsing callback.
12424 * If sax is NULL, fallback to the default DOM tree building routines.
12425 *
12426 * Returns the resulting document tree
12427 */
12428
12429xmlDocPtr
12430xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12431 int recovery) {
12432 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12433}
12434
12435/**
Owen Taylor3473f882001-02-23 17:55:21 +000012436 * xmlRecoverDoc:
12437 * @cur: a pointer to an array of xmlChar
12438 *
12439 * parse an XML in-memory document and build a tree.
12440 * In the case the document is not Well Formed, a tree is built anyway
12441 *
12442 * Returns the resulting document tree
12443 */
12444
12445xmlDocPtr
12446xmlRecoverDoc(xmlChar *cur) {
12447 return(xmlSAXParseDoc(NULL, cur, 1));
12448}
12449
12450/**
12451 * xmlParseFile:
12452 * @filename: the filename
12453 *
12454 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12455 * compressed document is provided by default if found at compile-time.
12456 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012457 * Returns the resulting document tree if the file was wellformed,
12458 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012459 */
12460
12461xmlDocPtr
12462xmlParseFile(const char *filename) {
12463 return(xmlSAXParseFile(NULL, filename, 0));
12464}
12465
12466/**
12467 * xmlRecoverFile:
12468 * @filename: the filename
12469 *
12470 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12471 * compressed document is provided by default if found at compile-time.
12472 * In the case the document is not Well Formed, a tree is built anyway
12473 *
12474 * Returns the resulting document tree
12475 */
12476
12477xmlDocPtr
12478xmlRecoverFile(const char *filename) {
12479 return(xmlSAXParseFile(NULL, filename, 1));
12480}
12481
12482
12483/**
12484 * xmlSetupParserForBuffer:
12485 * @ctxt: an XML parser context
12486 * @buffer: a xmlChar * buffer
12487 * @filename: a file name
12488 *
12489 * Setup the parser context to parse a new buffer; Clears any prior
12490 * contents from the parser context. The buffer parameter must not be
12491 * NULL, but the filename parameter can be
12492 */
12493void
12494xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12495 const char* filename)
12496{
12497 xmlParserInputPtr input;
12498
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012499 if ((ctxt == NULL) || (buffer == NULL))
12500 return;
12501
Owen Taylor3473f882001-02-23 17:55:21 +000012502 input = xmlNewInputStream(ctxt);
12503 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012504 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012505 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012506 return;
12507 }
12508
12509 xmlClearParserCtxt(ctxt);
12510 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012511 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012512 input->base = buffer;
12513 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012514 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012515 inputPush(ctxt, input);
12516}
12517
12518/**
12519 * xmlSAXUserParseFile:
12520 * @sax: a SAX handler
12521 * @user_data: The user data returned on SAX callbacks
12522 * @filename: a file name
12523 *
12524 * parse an XML file and call the given SAX handler routines.
12525 * Automatic support for ZLIB/Compress compressed document is provided
12526 *
12527 * Returns 0 in case of success or a error number otherwise
12528 */
12529int
12530xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12531 const char *filename) {
12532 int ret = 0;
12533 xmlParserCtxtPtr ctxt;
12534
12535 ctxt = xmlCreateFileParserCtxt(filename);
12536 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012537 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012538 xmlFree(ctxt->sax);
12539 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012540 xmlDetectSAX2(ctxt);
12541
Owen Taylor3473f882001-02-23 17:55:21 +000012542 if (user_data != NULL)
12543 ctxt->userData = user_data;
12544
12545 xmlParseDocument(ctxt);
12546
12547 if (ctxt->wellFormed)
12548 ret = 0;
12549 else {
12550 if (ctxt->errNo != 0)
12551 ret = ctxt->errNo;
12552 else
12553 ret = -1;
12554 }
12555 if (sax != NULL)
12556 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012557 if (ctxt->myDoc != NULL) {
12558 xmlFreeDoc(ctxt->myDoc);
12559 ctxt->myDoc = NULL;
12560 }
Owen Taylor3473f882001-02-23 17:55:21 +000012561 xmlFreeParserCtxt(ctxt);
12562
12563 return ret;
12564}
Daniel Veillard81273902003-09-30 00:43:48 +000012565#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012566
12567/************************************************************************
12568 * *
12569 * Front ends when parsing from memory *
12570 * *
12571 ************************************************************************/
12572
12573/**
12574 * xmlCreateMemoryParserCtxt:
12575 * @buffer: a pointer to a char array
12576 * @size: the size of the array
12577 *
12578 * Create a parser context for an XML in-memory document.
12579 *
12580 * Returns the new parser context or NULL
12581 */
12582xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012583xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012584 xmlParserCtxtPtr ctxt;
12585 xmlParserInputPtr input;
12586 xmlParserInputBufferPtr buf;
12587
12588 if (buffer == NULL)
12589 return(NULL);
12590 if (size <= 0)
12591 return(NULL);
12592
12593 ctxt = xmlNewParserCtxt();
12594 if (ctxt == NULL)
12595 return(NULL);
12596
Daniel Veillard53350552003-09-18 13:35:51 +000012597 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012598 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012599 if (buf == NULL) {
12600 xmlFreeParserCtxt(ctxt);
12601 return(NULL);
12602 }
Owen Taylor3473f882001-02-23 17:55:21 +000012603
12604 input = xmlNewInputStream(ctxt);
12605 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012606 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012607 xmlFreeParserCtxt(ctxt);
12608 return(NULL);
12609 }
12610
12611 input->filename = NULL;
12612 input->buf = buf;
12613 input->base = input->buf->buffer->content;
12614 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012615 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012616
12617 inputPush(ctxt, input);
12618 return(ctxt);
12619}
12620
Daniel Veillard81273902003-09-30 00:43:48 +000012621#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012622/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012623 * xmlSAXParseMemoryWithData:
12624 * @sax: the SAX handler block
12625 * @buffer: an pointer to a char array
12626 * @size: the size of the array
12627 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12628 * documents
12629 * @data: the userdata
12630 *
12631 * parse an XML in-memory block and use the given SAX function block
12632 * to handle the parsing callback. If sax is NULL, fallback to the default
12633 * DOM tree building routines.
12634 *
12635 * User data (void *) is stored within the parser context in the
12636 * context's _private member, so it is available nearly everywhere in libxml
12637 *
12638 * Returns the resulting document tree
12639 */
12640
12641xmlDocPtr
12642xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12643 int size, int recovery, void *data) {
12644 xmlDocPtr ret;
12645 xmlParserCtxtPtr ctxt;
12646
12647 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12648 if (ctxt == NULL) return(NULL);
12649 if (sax != NULL) {
12650 if (ctxt->sax != NULL)
12651 xmlFree(ctxt->sax);
12652 ctxt->sax = sax;
12653 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012654 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012655 if (data!=NULL) {
12656 ctxt->_private=data;
12657 }
12658
Daniel Veillardadba5f12003-04-04 16:09:01 +000012659 ctxt->recovery = recovery;
12660
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012661 xmlParseDocument(ctxt);
12662
12663 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12664 else {
12665 ret = NULL;
12666 xmlFreeDoc(ctxt->myDoc);
12667 ctxt->myDoc = NULL;
12668 }
12669 if (sax != NULL)
12670 ctxt->sax = NULL;
12671 xmlFreeParserCtxt(ctxt);
12672
12673 return(ret);
12674}
12675
12676/**
Owen Taylor3473f882001-02-23 17:55:21 +000012677 * xmlSAXParseMemory:
12678 * @sax: the SAX handler block
12679 * @buffer: an pointer to a char array
12680 * @size: the size of the array
12681 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12682 * documents
12683 *
12684 * parse an XML in-memory block and use the given SAX function block
12685 * to handle the parsing callback. If sax is NULL, fallback to the default
12686 * DOM tree building routines.
12687 *
12688 * Returns the resulting document tree
12689 */
12690xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012691xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12692 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012693 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012694}
12695
12696/**
12697 * xmlParseMemory:
12698 * @buffer: an pointer to a char array
12699 * @size: the size of the array
12700 *
12701 * parse an XML in-memory block and build a tree.
12702 *
12703 * Returns the resulting document tree
12704 */
12705
Daniel Veillard50822cb2001-07-26 20:05:51 +000012706xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012707 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12708}
12709
12710/**
12711 * xmlRecoverMemory:
12712 * @buffer: an pointer to a char array
12713 * @size: the size of the array
12714 *
12715 * parse an XML in-memory block and build a tree.
12716 * In the case the document is not Well Formed, a tree is built anyway
12717 *
12718 * Returns the resulting document tree
12719 */
12720
Daniel Veillard50822cb2001-07-26 20:05:51 +000012721xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012722 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12723}
12724
12725/**
12726 * xmlSAXUserParseMemory:
12727 * @sax: a SAX handler
12728 * @user_data: The user data returned on SAX callbacks
12729 * @buffer: an in-memory XML document input
12730 * @size: the length of the XML document in bytes
12731 *
12732 * A better SAX parsing routine.
12733 * parse an XML in-memory buffer and call the given SAX handler routines.
12734 *
12735 * Returns 0 in case of success or a error number otherwise
12736 */
12737int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012738 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012739 int ret = 0;
12740 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012741
12742 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12743 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012744 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12745 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000012746 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012747 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012748
Daniel Veillard30211a02001-04-26 09:33:18 +000012749 if (user_data != NULL)
12750 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012751
12752 xmlParseDocument(ctxt);
12753
12754 if (ctxt->wellFormed)
12755 ret = 0;
12756 else {
12757 if (ctxt->errNo != 0)
12758 ret = ctxt->errNo;
12759 else
12760 ret = -1;
12761 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012762 if (sax != NULL)
12763 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012764 if (ctxt->myDoc != NULL) {
12765 xmlFreeDoc(ctxt->myDoc);
12766 ctxt->myDoc = NULL;
12767 }
Owen Taylor3473f882001-02-23 17:55:21 +000012768 xmlFreeParserCtxt(ctxt);
12769
12770 return ret;
12771}
Daniel Veillard81273902003-09-30 00:43:48 +000012772#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012773
12774/**
12775 * xmlCreateDocParserCtxt:
12776 * @cur: a pointer to an array of xmlChar
12777 *
12778 * Creates a parser context for an XML in-memory document.
12779 *
12780 * Returns the new parser context or NULL
12781 */
12782xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012783xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012784 int len;
12785
12786 if (cur == NULL)
12787 return(NULL);
12788 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012789 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012790}
12791
Daniel Veillard81273902003-09-30 00:43:48 +000012792#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012793/**
12794 * xmlSAXParseDoc:
12795 * @sax: the SAX handler block
12796 * @cur: a pointer to an array of xmlChar
12797 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12798 * documents
12799 *
12800 * parse an XML in-memory document and build a tree.
12801 * It use the given SAX function block to handle the parsing callback.
12802 * If sax is NULL, fallback to the default DOM tree building routines.
12803 *
12804 * Returns the resulting document tree
12805 */
12806
12807xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012808xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012809 xmlDocPtr ret;
12810 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012811 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012812
Daniel Veillard38936062004-11-04 17:45:11 +000012813 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012814
12815
12816 ctxt = xmlCreateDocParserCtxt(cur);
12817 if (ctxt == NULL) return(NULL);
12818 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012819 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012820 ctxt->sax = sax;
12821 ctxt->userData = NULL;
12822 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012823 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012824
12825 xmlParseDocument(ctxt);
12826 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12827 else {
12828 ret = NULL;
12829 xmlFreeDoc(ctxt->myDoc);
12830 ctxt->myDoc = NULL;
12831 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012832 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012833 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012834 xmlFreeParserCtxt(ctxt);
12835
12836 return(ret);
12837}
12838
12839/**
12840 * xmlParseDoc:
12841 * @cur: a pointer to an array of xmlChar
12842 *
12843 * parse an XML in-memory document and build a tree.
12844 *
12845 * Returns the resulting document tree
12846 */
12847
12848xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012849xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012850 return(xmlSAXParseDoc(NULL, cur, 0));
12851}
Daniel Veillard81273902003-09-30 00:43:48 +000012852#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012853
Daniel Veillard81273902003-09-30 00:43:48 +000012854#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012855/************************************************************************
12856 * *
12857 * Specific function to keep track of entities references *
12858 * and used by the XSLT debugger *
12859 * *
12860 ************************************************************************/
12861
12862static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12863
12864/**
12865 * xmlAddEntityReference:
12866 * @ent : A valid entity
12867 * @firstNode : A valid first node for children of entity
12868 * @lastNode : A valid last node of children entity
12869 *
12870 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12871 */
12872static void
12873xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12874 xmlNodePtr lastNode)
12875{
12876 if (xmlEntityRefFunc != NULL) {
12877 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12878 }
12879}
12880
12881
12882/**
12883 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012884 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012885 *
12886 * Set the function to call call back when a xml reference has been made
12887 */
12888void
12889xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12890{
12891 xmlEntityRefFunc = func;
12892}
Daniel Veillard81273902003-09-30 00:43:48 +000012893#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012894
12895/************************************************************************
12896 * *
12897 * Miscellaneous *
12898 * *
12899 ************************************************************************/
12900
12901#ifdef LIBXML_XPATH_ENABLED
12902#include <libxml/xpath.h>
12903#endif
12904
Daniel Veillardffa3c742005-07-21 13:24:09 +000012905extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012906static int xmlParserInitialized = 0;
12907
12908/**
12909 * xmlInitParser:
12910 *
12911 * Initialization function for the XML parser.
12912 * This is not reentrant. Call once before processing in case of
12913 * use in multithreaded programs.
12914 */
12915
12916void
12917xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012918 if (xmlParserInitialized != 0)
12919 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012920
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012921#ifdef LIBXML_THREAD_ENABLED
12922 __xmlGlobalInitMutexLock();
12923 if (xmlParserInitialized == 0) {
12924#endif
12925 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12926 (xmlGenericError == NULL))
12927 initGenericErrorDefaultFunc(NULL);
12928 xmlInitGlobals();
12929 xmlInitThreads();
12930 xmlInitMemory();
12931 xmlInitCharEncodingHandlers();
12932 xmlDefaultSAXHandlerInit();
12933 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012934#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012935 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012936#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012937#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012938 htmlInitAutoClose();
12939 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012940#endif
12941#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012942 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012943#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012944 xmlParserInitialized = 1;
12945#ifdef LIBXML_THREAD_ENABLED
12946 }
12947 __xmlGlobalInitMutexUnlock();
12948#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012949}
12950
12951/**
12952 * xmlCleanupParser:
12953 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012954 * Cleanup function for the XML library. It tries to reclaim all
12955 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012956 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012957 * function should not prevent reusing the library but one should
12958 * call xmlCleanupParser() only when the process has
Daniel Veillardccc476f2008-03-04 13:19:49 +000012959 * finished using the library and all XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012960 */
12961
12962void
12963xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012964 if (!xmlParserInitialized)
12965 return;
12966
Owen Taylor3473f882001-02-23 17:55:21 +000012967 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012968#ifdef LIBXML_CATALOG_ENABLED
12969 xmlCatalogCleanup();
12970#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012971 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012972 xmlCleanupInputCallbacks();
12973#ifdef LIBXML_OUTPUT_ENABLED
12974 xmlCleanupOutputCallbacks();
12975#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012976#ifdef LIBXML_SCHEMAS_ENABLED
12977 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012978 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012979#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012980 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012981 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012982 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012983 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012984 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012985}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012986
12987/************************************************************************
12988 * *
12989 * New set (2.6.0) of simpler and more flexible APIs *
12990 * *
12991 ************************************************************************/
12992
12993/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012994 * DICT_FREE:
12995 * @str: a string
12996 *
12997 * Free a string if it is not owned by the "dict" dictionnary in the
12998 * current scope
12999 */
13000#define DICT_FREE(str) \
13001 if ((str) && ((!dict) || \
13002 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13003 xmlFree((char *)(str));
13004
13005/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006 * xmlCtxtReset:
13007 * @ctxt: an XML parser context
13008 *
13009 * Reset a parser context
13010 */
13011void
13012xmlCtxtReset(xmlParserCtxtPtr ctxt)
13013{
13014 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013015 xmlDictPtr dict;
13016
13017 if (ctxt == NULL)
13018 return;
13019
13020 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013021
13022 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13023 xmlFreeInputStream(input);
13024 }
13025 ctxt->inputNr = 0;
13026 ctxt->input = NULL;
13027
13028 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013029 if (ctxt->spaceTab != NULL) {
13030 ctxt->spaceTab[0] = -1;
13031 ctxt->space = &ctxt->spaceTab[0];
13032 } else {
13033 ctxt->space = NULL;
13034 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013035
13036
13037 ctxt->nodeNr = 0;
13038 ctxt->node = NULL;
13039
13040 ctxt->nameNr = 0;
13041 ctxt->name = NULL;
13042
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013043 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013044 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013045 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013046 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013047 DICT_FREE(ctxt->directory);
13048 ctxt->directory = NULL;
13049 DICT_FREE(ctxt->extSubURI);
13050 ctxt->extSubURI = NULL;
13051 DICT_FREE(ctxt->extSubSystem);
13052 ctxt->extSubSystem = NULL;
13053 if (ctxt->myDoc != NULL)
13054 xmlFreeDoc(ctxt->myDoc);
13055 ctxt->myDoc = NULL;
13056
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013057 ctxt->standalone = -1;
13058 ctxt->hasExternalSubset = 0;
13059 ctxt->hasPErefs = 0;
13060 ctxt->html = 0;
13061 ctxt->external = 0;
13062 ctxt->instate = XML_PARSER_START;
13063 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013064
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013065 ctxt->wellFormed = 1;
13066 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013067 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013068 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013069#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013070 ctxt->vctxt.userData = ctxt;
13071 ctxt->vctxt.error = xmlParserValidityError;
13072 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013073#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013074 ctxt->record_info = 0;
13075 ctxt->nbChars = 0;
13076 ctxt->checkIndex = 0;
13077 ctxt->inSubset = 0;
13078 ctxt->errNo = XML_ERR_OK;
13079 ctxt->depth = 0;
13080 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13081 ctxt->catalogs = NULL;
13082 xmlInitNodeInfoSeq(&ctxt->node_seq);
13083
13084 if (ctxt->attsDefault != NULL) {
13085 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13086 ctxt->attsDefault = NULL;
13087 }
13088 if (ctxt->attsSpecial != NULL) {
13089 xmlHashFree(ctxt->attsSpecial, NULL);
13090 ctxt->attsSpecial = NULL;
13091 }
13092
Daniel Veillard4432df22003-09-28 18:58:27 +000013093#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013094 if (ctxt->catalogs != NULL)
13095 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013096#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013097 if (ctxt->lastError.code != XML_ERR_OK)
13098 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013099}
13100
13101/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013102 * xmlCtxtResetPush:
13103 * @ctxt: an XML parser context
13104 * @chunk: a pointer to an array of chars
13105 * @size: number of chars in the array
13106 * @filename: an optional file name or URI
13107 * @encoding: the document encoding, or NULL
13108 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013109 * Reset a push parser context
13110 *
13111 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013112 */
13113int
13114xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13115 int size, const char *filename, const char *encoding)
13116{
13117 xmlParserInputPtr inputStream;
13118 xmlParserInputBufferPtr buf;
13119 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13120
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013121 if (ctxt == NULL)
13122 return(1);
13123
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013124 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13125 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13126
13127 buf = xmlAllocParserInputBuffer(enc);
13128 if (buf == NULL)
13129 return(1);
13130
13131 if (ctxt == NULL) {
13132 xmlFreeParserInputBuffer(buf);
13133 return(1);
13134 }
13135
13136 xmlCtxtReset(ctxt);
13137
13138 if (ctxt->pushTab == NULL) {
13139 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13140 sizeof(xmlChar *));
13141 if (ctxt->pushTab == NULL) {
13142 xmlErrMemory(ctxt, NULL);
13143 xmlFreeParserInputBuffer(buf);
13144 return(1);
13145 }
13146 }
13147
13148 if (filename == NULL) {
13149 ctxt->directory = NULL;
13150 } else {
13151 ctxt->directory = xmlParserGetDirectory(filename);
13152 }
13153
13154 inputStream = xmlNewInputStream(ctxt);
13155 if (inputStream == NULL) {
13156 xmlFreeParserInputBuffer(buf);
13157 return(1);
13158 }
13159
13160 if (filename == NULL)
13161 inputStream->filename = NULL;
13162 else
13163 inputStream->filename = (char *)
13164 xmlCanonicPath((const xmlChar *) filename);
13165 inputStream->buf = buf;
13166 inputStream->base = inputStream->buf->buffer->content;
13167 inputStream->cur = inputStream->buf->buffer->content;
13168 inputStream->end =
13169 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13170
13171 inputPush(ctxt, inputStream);
13172
13173 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13174 (ctxt->input->buf != NULL)) {
13175 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13176 int cur = ctxt->input->cur - ctxt->input->base;
13177
13178 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13179
13180 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13181 ctxt->input->cur = ctxt->input->base + cur;
13182 ctxt->input->end =
13183 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13184 use];
13185#ifdef DEBUG_PUSH
13186 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13187#endif
13188 }
13189
13190 if (encoding != NULL) {
13191 xmlCharEncodingHandlerPtr hdlr;
13192
13193 hdlr = xmlFindCharEncodingHandler(encoding);
13194 if (hdlr != NULL) {
13195 xmlSwitchToEncoding(ctxt, hdlr);
13196 } else {
13197 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13198 "Unsupported encoding %s\n", BAD_CAST encoding);
13199 }
13200 } else if (enc != XML_CHAR_ENCODING_NONE) {
13201 xmlSwitchEncoding(ctxt, enc);
13202 }
13203
13204 return(0);
13205}
13206
13207/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013208 * xmlCtxtUseOptions:
13209 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013210 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013211 *
13212 * Applies the options to the parser context
13213 *
13214 * Returns 0 in case of success, the set of unknown or unimplemented options
13215 * in case of error.
13216 */
13217int
13218xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13219{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013220 if (ctxt == NULL)
13221 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013222 if (options & XML_PARSE_RECOVER) {
13223 ctxt->recovery = 1;
13224 options -= XML_PARSE_RECOVER;
13225 } else
13226 ctxt->recovery = 0;
13227 if (options & XML_PARSE_DTDLOAD) {
13228 ctxt->loadsubset = XML_DETECT_IDS;
13229 options -= XML_PARSE_DTDLOAD;
13230 } else
13231 ctxt->loadsubset = 0;
13232 if (options & XML_PARSE_DTDATTR) {
13233 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13234 options -= XML_PARSE_DTDATTR;
13235 }
13236 if (options & XML_PARSE_NOENT) {
13237 ctxt->replaceEntities = 1;
13238 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13239 options -= XML_PARSE_NOENT;
13240 } else
13241 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013242 if (options & XML_PARSE_PEDANTIC) {
13243 ctxt->pedantic = 1;
13244 options -= XML_PARSE_PEDANTIC;
13245 } else
13246 ctxt->pedantic = 0;
13247 if (options & XML_PARSE_NOBLANKS) {
13248 ctxt->keepBlanks = 0;
13249 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13250 options -= XML_PARSE_NOBLANKS;
13251 } else
13252 ctxt->keepBlanks = 1;
13253 if (options & XML_PARSE_DTDVALID) {
13254 ctxt->validate = 1;
13255 if (options & XML_PARSE_NOWARNING)
13256 ctxt->vctxt.warning = NULL;
13257 if (options & XML_PARSE_NOERROR)
13258 ctxt->vctxt.error = NULL;
13259 options -= XML_PARSE_DTDVALID;
13260 } else
13261 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013262 if (options & XML_PARSE_NOWARNING) {
13263 ctxt->sax->warning = NULL;
13264 options -= XML_PARSE_NOWARNING;
13265 }
13266 if (options & XML_PARSE_NOERROR) {
13267 ctxt->sax->error = NULL;
13268 ctxt->sax->fatalError = NULL;
13269 options -= XML_PARSE_NOERROR;
13270 }
Daniel Veillard81273902003-09-30 00:43:48 +000013271#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013272 if (options & XML_PARSE_SAX1) {
13273 ctxt->sax->startElement = xmlSAX2StartElement;
13274 ctxt->sax->endElement = xmlSAX2EndElement;
13275 ctxt->sax->startElementNs = NULL;
13276 ctxt->sax->endElementNs = NULL;
13277 ctxt->sax->initialized = 1;
13278 options -= XML_PARSE_SAX1;
13279 }
Daniel Veillard81273902003-09-30 00:43:48 +000013280#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013281 if (options & XML_PARSE_NODICT) {
13282 ctxt->dictNames = 0;
13283 options -= XML_PARSE_NODICT;
13284 } else {
13285 ctxt->dictNames = 1;
13286 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013287 if (options & XML_PARSE_NOCDATA) {
13288 ctxt->sax->cdataBlock = NULL;
13289 options -= XML_PARSE_NOCDATA;
13290 }
13291 if (options & XML_PARSE_NSCLEAN) {
13292 ctxt->options |= XML_PARSE_NSCLEAN;
13293 options -= XML_PARSE_NSCLEAN;
13294 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013295 if (options & XML_PARSE_NONET) {
13296 ctxt->options |= XML_PARSE_NONET;
13297 options -= XML_PARSE_NONET;
13298 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013299 if (options & XML_PARSE_COMPACT) {
13300 ctxt->options |= XML_PARSE_COMPACT;
13301 options -= XML_PARSE_COMPACT;
13302 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013303 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013304 return (options);
13305}
13306
13307/**
13308 * xmlDoRead:
13309 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013310 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013311 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013312 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013313 * @reuse: keep the context for reuse
13314 *
13315 * Common front-end for the xmlRead functions
13316 *
13317 * Returns the resulting document tree or NULL
13318 */
13319static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013320xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13321 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013322{
13323 xmlDocPtr ret;
13324
13325 xmlCtxtUseOptions(ctxt, options);
13326 if (encoding != NULL) {
13327 xmlCharEncodingHandlerPtr hdlr;
13328
13329 hdlr = xmlFindCharEncodingHandler(encoding);
13330 if (hdlr != NULL)
13331 xmlSwitchToEncoding(ctxt, hdlr);
13332 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013333 if ((URL != NULL) && (ctxt->input != NULL) &&
13334 (ctxt->input->filename == NULL))
13335 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013336 xmlParseDocument(ctxt);
13337 if ((ctxt->wellFormed) || ctxt->recovery)
13338 ret = ctxt->myDoc;
13339 else {
13340 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013341 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013342 xmlFreeDoc(ctxt->myDoc);
13343 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013344 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013345 ctxt->myDoc = NULL;
13346 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013347 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013348 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013349
13350 return (ret);
13351}
13352
13353/**
13354 * xmlReadDoc:
13355 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013356 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013357 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013358 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013359 *
13360 * parse an XML in-memory document and build a tree.
13361 *
13362 * Returns the resulting document tree
13363 */
13364xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013365xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013366{
13367 xmlParserCtxtPtr ctxt;
13368
13369 if (cur == NULL)
13370 return (NULL);
13371
13372 ctxt = xmlCreateDocParserCtxt(cur);
13373 if (ctxt == NULL)
13374 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013375 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013376}
13377
13378/**
13379 * xmlReadFile:
13380 * @filename: a file or URL
13381 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013382 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013383 *
13384 * parse an XML file from the filesystem or the network.
13385 *
13386 * Returns the resulting document tree
13387 */
13388xmlDocPtr
13389xmlReadFile(const char *filename, const char *encoding, int options)
13390{
13391 xmlParserCtxtPtr ctxt;
13392
Daniel Veillard61b93382003-11-03 14:28:31 +000013393 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013394 if (ctxt == NULL)
13395 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013396 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013397}
13398
13399/**
13400 * xmlReadMemory:
13401 * @buffer: a pointer to a char array
13402 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013403 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013404 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013405 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013406 *
13407 * parse an XML in-memory document and build a tree.
13408 *
13409 * Returns the resulting document tree
13410 */
13411xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013412xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013413{
13414 xmlParserCtxtPtr ctxt;
13415
13416 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13417 if (ctxt == NULL)
13418 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013419 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013420}
13421
13422/**
13423 * xmlReadFd:
13424 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013426 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013427 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013428 *
13429 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013430 * NOTE that the file descriptor will not be closed when the
13431 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013432 *
13433 * Returns the resulting document tree
13434 */
13435xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013436xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013437{
13438 xmlParserCtxtPtr ctxt;
13439 xmlParserInputBufferPtr input;
13440 xmlParserInputPtr stream;
13441
13442 if (fd < 0)
13443 return (NULL);
13444
13445 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13446 if (input == NULL)
13447 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013448 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013449 ctxt = xmlNewParserCtxt();
13450 if (ctxt == NULL) {
13451 xmlFreeParserInputBuffer(input);
13452 return (NULL);
13453 }
13454 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13455 if (stream == NULL) {
13456 xmlFreeParserInputBuffer(input);
13457 xmlFreeParserCtxt(ctxt);
13458 return (NULL);
13459 }
13460 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013461 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462}
13463
13464/**
13465 * xmlReadIO:
13466 * @ioread: an I/O read function
13467 * @ioclose: an I/O close function
13468 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013469 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013470 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013471 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013472 *
13473 * parse an XML document from I/O functions and source and build a tree.
13474 *
13475 * Returns the resulting document tree
13476 */
13477xmlDocPtr
13478xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013479 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013480{
13481 xmlParserCtxtPtr ctxt;
13482 xmlParserInputBufferPtr input;
13483 xmlParserInputPtr stream;
13484
13485 if (ioread == NULL)
13486 return (NULL);
13487
13488 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13489 XML_CHAR_ENCODING_NONE);
13490 if (input == NULL)
13491 return (NULL);
13492 ctxt = xmlNewParserCtxt();
13493 if (ctxt == NULL) {
13494 xmlFreeParserInputBuffer(input);
13495 return (NULL);
13496 }
13497 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13498 if (stream == NULL) {
13499 xmlFreeParserInputBuffer(input);
13500 xmlFreeParserCtxt(ctxt);
13501 return (NULL);
13502 }
13503 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013504 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013505}
13506
13507/**
13508 * xmlCtxtReadDoc:
13509 * @ctxt: an XML parser context
13510 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013511 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013512 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013513 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013514 *
13515 * parse an XML in-memory document and build a tree.
13516 * This reuses the existing @ctxt parser context
13517 *
13518 * Returns the resulting document tree
13519 */
13520xmlDocPtr
13521xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013522 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013523{
13524 xmlParserInputPtr stream;
13525
13526 if (cur == NULL)
13527 return (NULL);
13528 if (ctxt == NULL)
13529 return (NULL);
13530
13531 xmlCtxtReset(ctxt);
13532
13533 stream = xmlNewStringInputStream(ctxt, cur);
13534 if (stream == NULL) {
13535 return (NULL);
13536 }
13537 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013538 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013539}
13540
13541/**
13542 * xmlCtxtReadFile:
13543 * @ctxt: an XML parser context
13544 * @filename: a file or URL
13545 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013546 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013547 *
13548 * parse an XML file from the filesystem or the network.
13549 * This reuses the existing @ctxt parser context
13550 *
13551 * Returns the resulting document tree
13552 */
13553xmlDocPtr
13554xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13555 const char *encoding, int options)
13556{
13557 xmlParserInputPtr stream;
13558
13559 if (filename == NULL)
13560 return (NULL);
13561 if (ctxt == NULL)
13562 return (NULL);
13563
13564 xmlCtxtReset(ctxt);
13565
Daniel Veillard29614c72004-11-26 10:47:26 +000013566 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013567 if (stream == NULL) {
13568 return (NULL);
13569 }
13570 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013571 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013572}
13573
13574/**
13575 * xmlCtxtReadMemory:
13576 * @ctxt: an XML parser context
13577 * @buffer: a pointer to a char array
13578 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013579 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013580 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013581 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013582 *
13583 * parse an XML in-memory document and build a tree.
13584 * This reuses the existing @ctxt parser context
13585 *
13586 * Returns the resulting document tree
13587 */
13588xmlDocPtr
13589xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013590 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013591{
13592 xmlParserInputBufferPtr input;
13593 xmlParserInputPtr stream;
13594
13595 if (ctxt == NULL)
13596 return (NULL);
13597 if (buffer == NULL)
13598 return (NULL);
13599
13600 xmlCtxtReset(ctxt);
13601
13602 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13603 if (input == NULL) {
13604 return(NULL);
13605 }
13606
13607 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13608 if (stream == NULL) {
13609 xmlFreeParserInputBuffer(input);
13610 return(NULL);
13611 }
13612
13613 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013614 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013615}
13616
13617/**
13618 * xmlCtxtReadFd:
13619 * @ctxt: an XML parser context
13620 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013621 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013622 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013623 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013624 *
13625 * parse an XML from a file descriptor and build a tree.
13626 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013627 * NOTE that the file descriptor will not be closed when the
13628 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013629 *
13630 * Returns the resulting document tree
13631 */
13632xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013633xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13634 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013635{
13636 xmlParserInputBufferPtr input;
13637 xmlParserInputPtr stream;
13638
13639 if (fd < 0)
13640 return (NULL);
13641 if (ctxt == NULL)
13642 return (NULL);
13643
13644 xmlCtxtReset(ctxt);
13645
13646
13647 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13648 if (input == NULL)
13649 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013650 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013651 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13652 if (stream == NULL) {
13653 xmlFreeParserInputBuffer(input);
13654 return (NULL);
13655 }
13656 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013657 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013658}
13659
13660/**
13661 * xmlCtxtReadIO:
13662 * @ctxt: an XML parser context
13663 * @ioread: an I/O read function
13664 * @ioclose: an I/O close function
13665 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013666 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013667 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013668 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013669 *
13670 * parse an XML document from I/O functions and source and build a tree.
13671 * This reuses the existing @ctxt parser context
13672 *
13673 * Returns the resulting document tree
13674 */
13675xmlDocPtr
13676xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13677 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013678 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013679 const char *encoding, int options)
13680{
13681 xmlParserInputBufferPtr input;
13682 xmlParserInputPtr stream;
13683
13684 if (ioread == NULL)
13685 return (NULL);
13686 if (ctxt == NULL)
13687 return (NULL);
13688
13689 xmlCtxtReset(ctxt);
13690
13691 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13692 XML_CHAR_ENCODING_NONE);
13693 if (input == NULL)
13694 return (NULL);
13695 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13696 if (stream == NULL) {
13697 xmlFreeParserInputBuffer(input);
13698 return (NULL);
13699 }
13700 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013701 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013702}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013703
13704#define bottom_parser
13705#include "elfgcchack.h"