blob: acfc3d53ffdd75147d7fc9d7f873880b79cd64c4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1020 defaults, NULL) < 0) {
1021 xmlFree(defaults);
1022 goto mem_error;
1023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 xmlDefAttrsPtr temp;
1026
1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001029 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001031 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1034 defaults, NULL) < 0) {
1035 xmlFree(defaults);
1036 goto mem_error;
1037 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001038 }
1039
1040 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001041 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001042 * are within the DTD and hen not associated to namespace names.
1043 */
1044 name = xmlSplitQName3(fullattr, &len);
1045 if (name == NULL) {
1046 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1047 prefix = NULL;
1048 } else {
1049 name = xmlDictLookup(ctxt->dict, name, -1);
1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1051 }
1052
1053 defaults->values[4 * defaults->nbAttrs] = name;
1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1055 /* intern the string and precompute the end */
1056 len = xmlStrlen(value);
1057 value = xmlDictLookup(ctxt->dict, value, len);
1058 defaults->values[4 * defaults->nbAttrs + 2] = value;
1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1060 defaults->nbAttrs++;
1061
1062 return;
1063
1064mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001066 return;
1067}
1068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069/**
1070 * xmlAddSpecialAttr:
1071 * @ctxt: an XML parser context
1072 * @fullname: the element fullname
1073 * @fullattr: the attribute fullname
1074 * @type: the attribute type
1075 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001076 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001077 */
1078static void
1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1080 const xmlChar *fullname,
1081 const xmlChar *fullattr,
1082 int type)
1083{
1084 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001086 if (ctxt->attsSpecial == NULL)
1087 goto mem_error;
1088 }
1089
Daniel Veillardac4118d2008-01-11 05:27:32 +00001090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1091 return;
1092
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1094 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001095 return;
1096
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001099 return;
1100}
1101
Daniel Veillard4432df22003-09-28 18:58:27 +00001102/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001103 * xmlCleanSpecialAttrCallback:
1104 *
1105 * Removes CDATA attributes from the special attribute table
1106 */
1107static void
1108xmlCleanSpecialAttrCallback(void *payload, void *data,
1109 const xmlChar *fullname, const xmlChar *fullattr,
1110 const xmlChar *unused ATTRIBUTE_UNUSED) {
1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1112
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001113 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1115 }
1116}
1117
1118/**
1119 * xmlCleanSpecialAttr:
1120 * @ctxt: an XML parser context
1121 *
1122 * Trim the list of attributes defined to remove all those of type
1123 * CDATA as they are not special. This call should be done when finishing
1124 * to parse the DTD and before starting to parse the document root.
1125 */
1126static void
1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1128{
1129 if (ctxt->attsSpecial == NULL)
1130 return;
1131
1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1133
1134 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1135 xmlHashFree(ctxt->attsSpecial, NULL);
1136 ctxt->attsSpecial = NULL;
1137 }
1138 return;
1139}
1140
1141/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001142 * xmlCheckLanguageID:
1143 * @lang: pointer to the string value
1144 *
1145 * Checks that the value conforms to the LanguageID production:
1146 *
1147 * NOTE: this is somewhat deprecated, those productions were removed from
1148 * the XML Second edition.
1149 *
1150 * [33] LanguageID ::= Langcode ('-' Subcode)*
1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1155 * [38] Subcode ::= ([a-z] | [A-Z])+
1156 *
1157 * Returns 1 if correct 0 otherwise
1158 **/
1159int
1160xmlCheckLanguageID(const xmlChar * lang)
1161{
1162 const xmlChar *cur = lang;
1163
1164 if (cur == NULL)
1165 return (0);
1166 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1167 ((cur[0] == 'I') && (cur[1] == '-'))) {
1168 /*
1169 * IANA code
1170 */
1171 cur += 2;
1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1173 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1174 cur++;
1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1176 ((cur[0] == 'X') && (cur[1] == '-'))) {
1177 /*
1178 * User code
1179 */
1180 cur += 2;
1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1182 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1183 cur++;
1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1186 /*
1187 * ISO639
1188 */
1189 cur++;
1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1191 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1192 cur++;
1193 else
1194 return (0);
1195 } else
1196 return (0);
1197 while (cur[0] != 0) { /* non input consuming */
1198 if (cur[0] != '-')
1199 return (0);
1200 cur++;
1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1202 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1203 cur++;
1204 else
1205 return (0);
1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1207 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1208 cur++;
1209 }
1210 return (1);
1211}
1212
Owen Taylor3473f882001-02-23 17:55:21 +00001213/************************************************************************
1214 * *
1215 * Parser stacks related functions and macros *
1216 * *
1217 ************************************************************************/
1218
1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1220 const xmlChar ** str);
1221
Daniel Veillard0fb18932003-09-07 09:14:37 +00001222#ifdef SAX2
1223/**
1224 * nsPush:
1225 * @ctxt: an XML parser context
1226 * @prefix: the namespace prefix or NULL
1227 * @URL: the namespace name
1228 *
1229 * Pushes a new parser namespace on top of the ns stack
1230 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001231 * Returns -1 in case of error, -2 if the namespace should be discarded
1232 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001233 */
1234static int
1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1236{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001237 if (ctxt->options & XML_PARSE_NSCLEAN) {
1238 int i;
1239 for (i = 0;i < ctxt->nsNr;i += 2) {
1240 if (ctxt->nsTab[i] == prefix) {
1241 /* in scope */
1242 if (ctxt->nsTab[i + 1] == URL)
1243 return(-2);
1244 /* out of scope keep it */
1245 break;
1246 }
1247 }
1248 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1250 ctxt->nsMax = 10;
1251 ctxt->nsNr = 0;
1252 ctxt->nsTab = (const xmlChar **)
1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1254 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001256 ctxt->nsMax = 0;
1257 return (-1);
1258 }
1259 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001260 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001261 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001262 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1263 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1264 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001265 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001266 ctxt->nsMax /= 2;
1267 return (-1);
1268 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001269 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001270 }
1271 ctxt->nsTab[ctxt->nsNr++] = prefix;
1272 ctxt->nsTab[ctxt->nsNr++] = URL;
1273 return (ctxt->nsNr);
1274}
1275/**
1276 * nsPop:
1277 * @ctxt: an XML parser context
1278 * @nr: the number to pop
1279 *
1280 * Pops the top @nr parser prefix/namespace from the ns stack
1281 *
1282 * Returns the number of namespaces removed
1283 */
1284static int
1285nsPop(xmlParserCtxtPtr ctxt, int nr)
1286{
1287 int i;
1288
1289 if (ctxt->nsTab == NULL) return(0);
1290 if (ctxt->nsNr < nr) {
1291 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1292 nr = ctxt->nsNr;
1293 }
1294 if (ctxt->nsNr <= 0)
1295 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001296
Daniel Veillard0fb18932003-09-07 09:14:37 +00001297 for (i = 0;i < nr;i++) {
1298 ctxt->nsNr--;
1299 ctxt->nsTab[ctxt->nsNr] = NULL;
1300 }
1301 return(nr);
1302}
1303#endif
1304
1305static int
1306xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1307 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001309 int maxatts;
1310
1311 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001312 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001313 atts = (const xmlChar **)
1314 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001315 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001316 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001317 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1318 if (attallocs == NULL) goto mem_error;
1319 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001320 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001321 } else if (nr + 5 > ctxt->maxatts) {
1322 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001323 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1324 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001326 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001327 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1328 (maxatts / 5) * sizeof(int));
1329 if (attallocs == NULL) goto mem_error;
1330 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001331 ctxt->maxatts = maxatts;
1332 }
1333 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001334mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001335 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001336 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001337}
1338
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001339/**
1340 * inputPush:
1341 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001342 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001343 *
1344 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001345 *
1346 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001349inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1350{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001351 if ((ctxt == NULL) || (value == NULL))
1352 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->inputNr >= ctxt->inputMax) {
1354 ctxt->inputMax *= 2;
1355 ctxt->inputTab =
1356 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1357 ctxt->inputMax *
1358 sizeof(ctxt->inputTab[0]));
1359 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001360 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001361 return (0);
1362 }
1363 }
1364 ctxt->inputTab[ctxt->inputNr] = value;
1365 ctxt->input = value;
1366 return (ctxt->inputNr++);
1367}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001368/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001369 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001370 * @ctxt: an XML parser context
1371 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001373 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001374 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001375 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001376xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001377inputPop(xmlParserCtxtPtr ctxt)
1378{
1379 xmlParserInputPtr ret;
1380
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001381 if (ctxt == NULL)
1382 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001384 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 ctxt->inputNr--;
1386 if (ctxt->inputNr > 0)
1387 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1388 else
1389 ctxt->input = NULL;
1390 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001391 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001392 return (ret);
1393}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001394/**
1395 * nodePush:
1396 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001397 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001398 *
1399 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001400 *
1401 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001402 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001403int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001404nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1405{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001406 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001408 xmlNodePtr *tmp;
1409
1410 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1411 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001412 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001413 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001415 return (0);
1416 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001417 ctxt->nodeTab = tmp;
1418 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001420 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001421 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001422 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1423 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001424 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001425 return(0);
1426 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 ctxt->nodeTab[ctxt->nodeNr] = value;
1428 ctxt->node = value;
1429 return (ctxt->nodeNr++);
1430}
1431/**
1432 * nodePop:
1433 * @ctxt: an XML parser context
1434 *
1435 * Pops the top element node from the node stack
1436 *
1437 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001438 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001439xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001440nodePop(xmlParserCtxtPtr ctxt)
1441{
1442 xmlNodePtr ret;
1443
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001444 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001446 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001447 ctxt->nodeNr--;
1448 if (ctxt->nodeNr > 0)
1449 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1450 else
1451 ctxt->node = NULL;
1452 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001453 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001454 return (ret);
1455}
Daniel Veillarda2351322004-06-27 12:08:10 +00001456
1457#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001459 * nameNsPush:
1460 * @ctxt: an XML parser context
1461 * @value: the element name
1462 * @prefix: the element prefix
1463 * @URI: the element namespace name
1464 *
1465 * Pushes a new element name/prefix/URL on top of the name stack
1466 *
1467 * Returns -1 in case of error, the index in the stack otherwise
1468 */
1469static int
1470nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1471 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1472{
1473 if (ctxt->nameNr >= ctxt->nameMax) {
1474 const xmlChar * *tmp;
1475 void **tmp2;
1476 ctxt->nameMax *= 2;
1477 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1478 ctxt->nameMax *
1479 sizeof(ctxt->nameTab[0]));
1480 if (tmp == NULL) {
1481 ctxt->nameMax /= 2;
1482 goto mem_error;
1483 }
1484 ctxt->nameTab = tmp;
1485 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1486 ctxt->nameMax * 3 *
1487 sizeof(ctxt->pushTab[0]));
1488 if (tmp2 == NULL) {
1489 ctxt->nameMax /= 2;
1490 goto mem_error;
1491 }
1492 ctxt->pushTab = tmp2;
1493 }
1494 ctxt->nameTab[ctxt->nameNr] = value;
1495 ctxt->name = value;
1496 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1497 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001498 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001499 return (ctxt->nameNr++);
1500mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001501 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001502 return (-1);
1503}
1504/**
1505 * nameNsPop:
1506 * @ctxt: an XML parser context
1507 *
1508 * Pops the top element/prefix/URI name from the name stack
1509 *
1510 * Returns the name just removed
1511 */
1512static const xmlChar *
1513nameNsPop(xmlParserCtxtPtr ctxt)
1514{
1515 const xmlChar *ret;
1516
1517 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001518 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001519 ctxt->nameNr--;
1520 if (ctxt->nameNr > 0)
1521 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1522 else
1523 ctxt->name = NULL;
1524 ret = ctxt->nameTab[ctxt->nameNr];
1525 ctxt->nameTab[ctxt->nameNr] = NULL;
1526 return (ret);
1527}
Daniel Veillarda2351322004-06-27 12:08:10 +00001528#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001529
1530/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 * namePush:
1532 * @ctxt: an XML parser context
1533 * @value: the element name
1534 *
1535 * Pushes a new element name on top of the name stack
1536 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001539int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001540namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001542 if (ctxt == NULL) return (-1);
1543
Daniel Veillard1c732d22002-11-30 11:22:59 +00001544 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001545 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001546 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001547 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 ctxt->nameMax *
1549 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001550 if (tmp == NULL) {
1551 ctxt->nameMax /= 2;
1552 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001554 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001555 }
1556 ctxt->nameTab[ctxt->nameNr] = value;
1557 ctxt->name = value;
1558 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001559mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001560 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001561 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562}
1563/**
1564 * namePop:
1565 * @ctxt: an XML parser context
1566 *
1567 * Pops the top element name from the name stack
1568 *
1569 * Returns the name just removed
1570 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001571const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001572namePop(xmlParserCtxtPtr ctxt)
1573{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001574 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001575
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001576 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nameNr--;
1579 if (ctxt->nameNr > 0)
1580 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1581 else
1582 ctxt->name = NULL;
1583 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001588static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001589 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001590 int *tmp;
1591
Owen Taylor3473f882001-02-23 17:55:21 +00001592 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001593 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1594 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1595 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001596 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001597 return(0);
1598 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001599 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 ctxt->spaceTab[ctxt->spaceNr] = val;
1602 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1603 return(ctxt->spaceNr++);
1604}
1605
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001606static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001607 int ret;
1608 if (ctxt->spaceNr <= 0) return(0);
1609 ctxt->spaceNr--;
1610 if (ctxt->spaceNr > 0)
1611 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1612 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001613 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001614 ret = ctxt->spaceTab[ctxt->spaceNr];
1615 ctxt->spaceTab[ctxt->spaceNr] = -1;
1616 return(ret);
1617}
1618
1619/*
1620 * Macros for accessing the content. Those should be used only by the parser,
1621 * and not exported.
1622 *
1623 * Dirty macros, i.e. one often need to make assumption on the context to
1624 * use them
1625 *
1626 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1627 * To be used with extreme caution since operations consuming
1628 * characters may move the input buffer to a different location !
1629 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1630 * This should be used internally by the parser
1631 * only to compare to ASCII values otherwise it would break when
1632 * running with UTF-8 encoding.
1633 * RAW same as CUR but in the input buffer, bypass any token
1634 * extraction that may have been done
1635 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1636 * to compare on ASCII based substring.
1637 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001638 * strings without newlines within the parser.
1639 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1640 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001641 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1642 *
1643 * NEXT Skip to the next character, this does the proper decoding
1644 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001645 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001646 * CUR_CHAR(l) returns the current unicode character (int), set l
1647 * to the number of xmlChars used for the encoding [0-5].
1648 * CUR_SCHAR same but operate on a string instead of the context
1649 * COPY_BUF copy the current unicode char to the target buffer, increment
1650 * the index
1651 * GROW, SHRINK handling of input buffers
1652 */
1653
Daniel Veillardfdc91562002-07-01 21:52:03 +00001654#define RAW (*ctxt->input->cur)
1655#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001656#define NXT(val) ctxt->input->cur[(val)]
1657#define CUR_PTR ctxt->input->cur
1658
Daniel Veillarda07050d2003-10-19 14:46:32 +00001659#define CMP4( s, c1, c2, c3, c4 ) \
1660 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1661 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1662#define CMP5( s, c1, c2, c3, c4, c5 ) \
1663 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1664#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1665 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1666#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1667 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1668#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1669 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1670#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1671 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1672 ((unsigned char *) s)[ 8 ] == c9 )
1673#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1674 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1675 ((unsigned char *) s)[ 9 ] == c10 )
1676
Owen Taylor3473f882001-02-23 17:55:21 +00001677#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001678 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001679 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001680 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001681 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1682 xmlPopInput(ctxt); \
1683 } while (0)
1684
Daniel Veillard0b787f32004-03-26 17:29:53 +00001685#define SKIPL(val) do { \
1686 int skipl; \
1687 for(skipl=0; skipl<val; skipl++) { \
1688 if (*(ctxt->input->cur) == '\n') { \
1689 ctxt->input->line++; ctxt->input->col = 1; \
1690 } else ctxt->input->col++; \
1691 ctxt->nbChars++; \
1692 ctxt->input->cur++; \
1693 } \
1694 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1695 if ((*ctxt->input->cur == 0) && \
1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1697 xmlPopInput(ctxt); \
1698 } while (0)
1699
Daniel Veillarda880b122003-04-21 21:36:41 +00001700#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001701 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1702 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001703 xmlSHRINK (ctxt);
1704
1705static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1706 xmlParserInputShrink(ctxt->input);
1707 if ((*ctxt->input->cur == 0) &&
1708 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1709 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001710 }
Owen Taylor3473f882001-02-23 17:55:21 +00001711
Daniel Veillarda880b122003-04-21 21:36:41 +00001712#define GROW if ((ctxt->progressive == 0) && \
1713 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001714 xmlGROW (ctxt);
1715
1716static void xmlGROW (xmlParserCtxtPtr ctxt) {
1717 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1718 if ((*ctxt->input->cur == 0) &&
1719 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1720 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001721}
Owen Taylor3473f882001-02-23 17:55:21 +00001722
1723#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1724
1725#define NEXT xmlNextChar(ctxt)
1726
Daniel Veillard21a0f912001-02-25 19:54:14 +00001727#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001728 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001729 ctxt->input->cur++; \
1730 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001731 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001732 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1733 }
1734
Owen Taylor3473f882001-02-23 17:55:21 +00001735#define NEXTL(l) do { \
1736 if (*(ctxt->input->cur) == '\n') { \
1737 ctxt->input->line++; ctxt->input->col = 1; \
1738 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001739 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001740 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001741 } while (0)
1742
1743#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1744#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1745
1746#define COPY_BUF(l,b,i,v) \
1747 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001748 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001749
1750/**
1751 * xmlSkipBlankChars:
1752 * @ctxt: the XML parser context
1753 *
1754 * skip all blanks character found at that point in the input streams.
1755 * It pops up finished entities in the process if allowable at that point.
1756 *
1757 * Returns the number of space chars skipped
1758 */
1759
1760int
1761xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001762 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001763
1764 /*
1765 * It's Okay to use CUR/NEXT here since all the blanks are on
1766 * the ASCII range.
1767 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001768 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1769 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001771 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001772 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001773 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001774 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001775 if (*cur == '\n') {
1776 ctxt->input->line++; ctxt->input->col = 1;
1777 }
1778 cur++;
1779 res++;
1780 if (*cur == 0) {
1781 ctxt->input->cur = cur;
1782 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1783 cur = ctxt->input->cur;
1784 }
1785 }
1786 ctxt->input->cur = cur;
1787 } else {
1788 int cur;
1789 do {
1790 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001791 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001792 NEXT;
1793 cur = CUR;
1794 res++;
1795 }
1796 while ((cur == 0) && (ctxt->inputNr > 1) &&
1797 (ctxt->instate != XML_PARSER_COMMENT)) {
1798 xmlPopInput(ctxt);
1799 cur = CUR;
1800 }
1801 /*
1802 * Need to handle support of entities branching here
1803 */
1804 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1805 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1806 }
Owen Taylor3473f882001-02-23 17:55:21 +00001807 return(res);
1808}
1809
1810/************************************************************************
1811 * *
1812 * Commodity functions to handle entities *
1813 * *
1814 ************************************************************************/
1815
1816/**
1817 * xmlPopInput:
1818 * @ctxt: an XML parser context
1819 *
1820 * xmlPopInput: the current input pointed by ctxt->input came to an end
1821 * pop it and return the next char.
1822 *
1823 * Returns the current xmlChar in the parser context
1824 */
1825xmlChar
1826xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001827 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (xmlParserDebugEntities)
1829 xmlGenericError(xmlGenericErrorContext,
1830 "Popping input %d\n", ctxt->inputNr);
1831 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001832 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001833 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1834 return(xmlPopInput(ctxt));
1835 return(CUR);
1836}
1837
1838/**
1839 * xmlPushInput:
1840 * @ctxt: an XML parser context
1841 * @input: an XML parser input fragment (entity, XML fragment ...).
1842 *
1843 * xmlPushInput: switch to a new input stream which is stacked on top
1844 * of the previous one(s).
1845 */
1846void
1847xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1848 if (input == NULL) return;
1849
1850 if (xmlParserDebugEntities) {
1851 if ((ctxt->input != NULL) && (ctxt->input->filename))
1852 xmlGenericError(xmlGenericErrorContext,
1853 "%s(%d): ", ctxt->input->filename,
1854 ctxt->input->line);
1855 xmlGenericError(xmlGenericErrorContext,
1856 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1857 }
1858 inputPush(ctxt, input);
1859 GROW;
1860}
1861
1862/**
1863 * xmlParseCharRef:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse Reference declarations
1867 *
1868 * [66] CharRef ::= '&#' [0-9]+ ';' |
1869 * '&#x' [0-9a-fA-F]+ ';'
1870 *
1871 * [ WFC: Legal Character ]
1872 * Characters referred to using character references must match the
1873 * production for Char.
1874 *
1875 * Returns the value parsed (as an int), 0 in case of error
1876 */
1877int
1878xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001879 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001881 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001882
Owen Taylor3473f882001-02-23 17:55:21 +00001883 /*
1884 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1885 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001886 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001887 (NXT(2) == 'x')) {
1888 SKIP(3);
1889 GROW;
1890 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001891 if (count++ > 20) {
1892 count = 0;
1893 GROW;
1894 }
1895 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001896 val = val * 16 + (CUR - '0');
1897 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1898 val = val * 16 + (CUR - 'a') + 10;
1899 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1900 val = val * 16 + (CUR - 'A') + 10;
1901 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001902 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 val = 0;
1904 break;
1905 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
Owen Taylor3473f882001-02-23 17:55:21 +00001909 NEXT;
1910 count++;
1911 }
1912 if (RAW == ';') {
1913 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001914 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001915 ctxt->nbChars ++;
1916 ctxt->input->cur++;
1917 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001918 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001919 SKIP(2);
1920 GROW;
1921 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001922 if (count++ > 20) {
1923 count = 0;
1924 GROW;
1925 }
1926 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001927 val = val * 10 + (CUR - '0');
1928 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001929 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001930 val = 0;
1931 break;
1932 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001933 if (val > 0x10FFFF)
1934 outofrange = val;
1935
Owen Taylor3473f882001-02-23 17:55:21 +00001936 NEXT;
1937 count++;
1938 }
1939 if (RAW == ';') {
1940 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001941 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001942 ctxt->nbChars ++;
1943 ctxt->input->cur++;
1944 }
1945 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 }
1948
1949 /*
1950 * [ WFC: Legal Character ]
1951 * Characters referred to using character references must match the
1952 * production for Char.
1953 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001954 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001955 return(val);
1956 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001957 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1958 "xmlParseCharRef: invalid xmlChar value %d\n",
1959 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 }
1961 return(0);
1962}
1963
1964/**
1965 * xmlParseStringCharRef:
1966 * @ctxt: an XML parser context
1967 * @str: a pointer to an index in the string
1968 *
1969 * parse Reference declarations, variant parsing from a string rather
1970 * than an an input flow.
1971 *
1972 * [66] CharRef ::= '&#' [0-9]+ ';' |
1973 * '&#x' [0-9a-fA-F]+ ';'
1974 *
1975 * [ WFC: Legal Character ]
1976 * Characters referred to using character references must match the
1977 * production for Char.
1978 *
1979 * Returns the value parsed (as an int), 0 in case of error, str will be
1980 * updated to the current value of the index
1981 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001982static int
Owen Taylor3473f882001-02-23 17:55:21 +00001983xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1984 const xmlChar *ptr;
1985 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001986 unsigned int val = 0;
1987 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001988
1989 if ((str == NULL) || (*str == NULL)) return(0);
1990 ptr = *str;
1991 cur = *ptr;
1992 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1993 ptr += 3;
1994 cur = *ptr;
1995 while (cur != ';') { /* Non input consuming loop */
1996 if ((cur >= '0') && (cur <= '9'))
1997 val = val * 16 + (cur - '0');
1998 else if ((cur >= 'a') && (cur <= 'f'))
1999 val = val * 16 + (cur - 'a') + 10;
2000 else if ((cur >= 'A') && (cur <= 'F'))
2001 val = val * 16 + (cur - 'A') + 10;
2002 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002003 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002004 val = 0;
2005 break;
2006 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002007 if (val > 0x10FFFF)
2008 outofrange = val;
2009
Owen Taylor3473f882001-02-23 17:55:21 +00002010 ptr++;
2011 cur = *ptr;
2012 }
2013 if (cur == ';')
2014 ptr++;
2015 } else if ((cur == '&') && (ptr[1] == '#')){
2016 ptr += 2;
2017 cur = *ptr;
2018 while (cur != ';') { /* Non input consuming loops */
2019 if ((cur >= '0') && (cur <= '9'))
2020 val = val * 10 + (cur - '0');
2021 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002022 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002023 val = 0;
2024 break;
2025 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002026 if (val > 0x10FFFF)
2027 outofrange = val;
2028
Owen Taylor3473f882001-02-23 17:55:21 +00002029 ptr++;
2030 cur = *ptr;
2031 }
2032 if (cur == ';')
2033 ptr++;
2034 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002035 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002036 return(0);
2037 }
2038 *str = ptr;
2039
2040 /*
2041 * [ WFC: Legal Character ]
2042 * Characters referred to using character references must match the
2043 * production for Char.
2044 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002045 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002046 return(val);
2047 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002048 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2049 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2050 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002051 }
2052 return(0);
2053}
2054
2055/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002056 * xmlNewBlanksWrapperInputStream:
2057 * @ctxt: an XML parser context
2058 * @entity: an Entity pointer
2059 *
2060 * Create a new input stream for wrapping
2061 * blanks around a PEReference
2062 *
2063 * Returns the new input stream or NULL
2064 */
2065
2066static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2067
Daniel Veillardf4862f02002-09-10 11:13:43 +00002068static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002069xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2070 xmlParserInputPtr input;
2071 xmlChar *buffer;
2072 size_t length;
2073 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2075 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002076 return(NULL);
2077 }
2078 if (xmlParserDebugEntities)
2079 xmlGenericError(xmlGenericErrorContext,
2080 "new blanks wrapper for entity: %s\n", entity->name);
2081 input = xmlNewInputStream(ctxt);
2082 if (input == NULL) {
2083 return(NULL);
2084 }
2085 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002086 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002087 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002088 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002089 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002090 return(NULL);
2091 }
2092 buffer [0] = ' ';
2093 buffer [1] = '%';
2094 buffer [length-3] = ';';
2095 buffer [length-2] = ' ';
2096 buffer [length-1] = 0;
2097 memcpy(buffer + 2, entity->name, length - 5);
2098 input->free = deallocblankswrapper;
2099 input->base = buffer;
2100 input->cur = buffer;
2101 input->length = length;
2102 input->end = &buffer[length];
2103 return(input);
2104}
2105
2106/**
Owen Taylor3473f882001-02-23 17:55:21 +00002107 * xmlParserHandlePEReference:
2108 * @ctxt: the parser context
2109 *
2110 * [69] PEReference ::= '%' Name ';'
2111 *
2112 * [ WFC: No Recursion ]
2113 * A parsed entity must not contain a recursive
2114 * reference to itself, either directly or indirectly.
2115 *
2116 * [ WFC: Entity Declared ]
2117 * In a document without any DTD, a document with only an internal DTD
2118 * subset which contains no parameter entity references, or a document
2119 * with "standalone='yes'", ... ... The declaration of a parameter
2120 * entity must precede any reference to it...
2121 *
2122 * [ VC: Entity Declared ]
2123 * In a document with an external subset or external parameter entities
2124 * with "standalone='no'", ... ... The declaration of a parameter entity
2125 * must precede any reference to it...
2126 *
2127 * [ WFC: In DTD ]
2128 * Parameter-entity references may only appear in the DTD.
2129 * NOTE: misleading but this is handled.
2130 *
2131 * A PEReference may have been detected in the current input stream
2132 * the handling is done accordingly to
2133 * http://www.w3.org/TR/REC-xml#entproc
2134 * i.e.
2135 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002136 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002137 */
2138void
2139xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002140 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002141 xmlEntityPtr entity = NULL;
2142 xmlParserInputPtr input;
2143
Owen Taylor3473f882001-02-23 17:55:21 +00002144 if (RAW != '%') return;
2145 switch(ctxt->instate) {
2146 case XML_PARSER_CDATA_SECTION:
2147 return;
2148 case XML_PARSER_COMMENT:
2149 return;
2150 case XML_PARSER_START_TAG:
2151 return;
2152 case XML_PARSER_END_TAG:
2153 return;
2154 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002155 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002156 return;
2157 case XML_PARSER_PROLOG:
2158 case XML_PARSER_START:
2159 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002160 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002161 return;
2162 case XML_PARSER_ENTITY_DECL:
2163 case XML_PARSER_CONTENT:
2164 case XML_PARSER_ATTRIBUTE_VALUE:
2165 case XML_PARSER_PI:
2166 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002167 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002168 /* we just ignore it there */
2169 return;
2170 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002171 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002172 return;
2173 case XML_PARSER_ENTITY_VALUE:
2174 /*
2175 * NOTE: in the case of entity values, we don't do the
2176 * substitution here since we need the literal
2177 * entity value to be able to save the internal
2178 * subset of the document.
2179 * This will be handled by xmlStringDecodeEntities
2180 */
2181 return;
2182 case XML_PARSER_DTD:
2183 /*
2184 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2185 * In the internal DTD subset, parameter-entity references
2186 * can occur only where markup declarations can occur, not
2187 * within markup declarations.
2188 * In that case this is handled in xmlParseMarkupDecl
2189 */
2190 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2191 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002192 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002194 break;
2195 case XML_PARSER_IGNORE:
2196 return;
2197 }
2198
2199 NEXT;
2200 name = xmlParseName(ctxt);
2201 if (xmlParserDebugEntities)
2202 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002203 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002204 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002205 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002206 } else {
2207 if (RAW == ';') {
2208 NEXT;
2209 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2210 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2211 if (entity == NULL) {
2212
2213 /*
2214 * [ WFC: Entity Declared ]
2215 * In a document without any DTD, a document with only an
2216 * internal DTD subset which contains no parameter entity
2217 * references, or a document with "standalone='yes'", ...
2218 * ... The declaration of a parameter entity must precede
2219 * any reference to it...
2220 */
2221 if ((ctxt->standalone == 1) ||
2222 ((ctxt->hasExternalSubset == 0) &&
2223 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002225 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 } else {
2227 /*
2228 * [ VC: Entity Declared ]
2229 * In a document with an external subset or external
2230 * parameter entities with "standalone='no'", ...
2231 * ... The declaration of a parameter entity must precede
2232 * any reference to it...
2233 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002234 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2235 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2236 "PEReference: %%%s; not found\n",
2237 name);
2238 } else
2239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2240 "PEReference: %%%s; not found\n",
2241 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 ctxt->valid = 0;
2243 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002244 } else if (ctxt->input->free != deallocblankswrapper) {
2245 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2246 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else {
2248 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2249 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002250 xmlChar start[4];
2251 xmlCharEncoding enc;
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 /*
2254 * handle the extra spaces added before and after
2255 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002256 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002257 */
2258 input = xmlNewEntityInputStream(ctxt, entity);
2259 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002260
2261 /*
2262 * Get the 4 first bytes and decode the charset
2263 * if enc != XML_CHAR_ENCODING_NONE
2264 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002265 * Note that, since we may have some non-UTF8
2266 * encoding (like UTF16, bug 135229), the 'length'
2267 * is not known, but we can calculate based upon
2268 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002269 */
2270 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002271 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002272 start[0] = RAW;
2273 start[1] = NXT(1);
2274 start[2] = NXT(2);
2275 start[3] = NXT(3);
2276 enc = xmlDetectCharEncoding(start, 4);
2277 if (enc != XML_CHAR_ENCODING_NONE) {
2278 xmlSwitchEncoding(ctxt, enc);
2279 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002280 }
2281
Owen Taylor3473f882001-02-23 17:55:21 +00002282 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002283 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2284 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlParseTextDecl(ctxt);
2286 }
Owen Taylor3473f882001-02-23 17:55:21 +00002287 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2289 "PEReference: %s is not a parameter entity\n",
2290 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292 }
2293 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002294 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
Owen Taylor3473f882001-02-23 17:55:21 +00002296 }
2297}
2298
2299/*
2300 * Macro used to grow the current buffer.
2301 */
2302#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002303 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002304 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002305 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002306 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002307 if (tmp == NULL) goto mem_error; \
2308 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002309}
2310
2311/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002312 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002313 * @ctxt: the parser context
2314 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002315 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002316 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2317 * @end: an end marker xmlChar, 0 if none
2318 * @end2: an end marker xmlChar, 0 if none
2319 * @end3: an end marker xmlChar, 0 if none
2320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002321 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002322 *
2323 * [67] Reference ::= EntityRef | CharRef
2324 *
2325 * [69] PEReference ::= '%' Name ';'
2326 *
2327 * Returns A newly allocated string with the substitution done. The caller
2328 * must deallocate it !
2329 */
2330xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2332 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002333 xmlChar *buffer = NULL;
2334 int buffer_size = 0;
2335
2336 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002337 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002338 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 xmlEntityPtr ent;
2340 int c,l;
2341 int nbchars = 0;
2342
Daniel Veillarda82b1822004-11-08 16:24:57 +00002343 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002344 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002346
2347 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002348 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002349 return(NULL);
2350 }
2351
2352 /*
2353 * allocate a translation buffer.
2354 */
2355 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002356 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002357 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002358
2359 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002360 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 * we are operating on already parsed values.
2362 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002363 if (str < last)
2364 c = CUR_SCHAR(str, l);
2365 else
2366 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002367 while ((c != 0) && (c != end) && /* non input consuming loop */
2368 (c != end2) && (c != end3)) {
2369
2370 if (c == 0) break;
2371 if ((c == '&') && (str[1] == '#')) {
2372 int val = xmlParseStringCharRef(ctxt, &str);
2373 if (val != 0) {
2374 COPY_BUF(0,buffer,nbchars,val);
2375 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002376 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2377 growBuffer(buffer);
2378 }
Owen Taylor3473f882001-02-23 17:55:21 +00002379 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2380 if (xmlParserDebugEntities)
2381 xmlGenericError(xmlGenericErrorContext,
2382 "String decoding Entity Reference: %.30s\n",
2383 str);
2384 ent = xmlParseStringEntityRef(ctxt, &str);
2385 if ((ent != NULL) &&
2386 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2387 if (ent->content != NULL) {
2388 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002389 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2390 growBuffer(buffer);
2391 }
Owen Taylor3473f882001-02-23 17:55:21 +00002392 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002393 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2394 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002395 }
2396 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002397 ctxt->depth++;
2398 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2399 0, 0, 0);
2400 ctxt->depth--;
2401 if (rep != NULL) {
2402 current = rep;
2403 while (*current != 0) { /* non input consuming loop */
2404 buffer[nbchars++] = *current++;
2405 if (nbchars >
2406 buffer_size - XML_PARSER_BUFFER_SIZE) {
2407 growBuffer(buffer);
2408 }
2409 }
2410 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002411 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002412 }
2413 } else if (ent != NULL) {
2414 int i = xmlStrlen(ent->name);
2415 const xmlChar *cur = ent->name;
2416
2417 buffer[nbchars++] = '&';
2418 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2419 growBuffer(buffer);
2420 }
2421 for (;i > 0;i--)
2422 buffer[nbchars++] = *cur++;
2423 buffer[nbchars++] = ';';
2424 }
2425 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2426 if (xmlParserDebugEntities)
2427 xmlGenericError(xmlGenericErrorContext,
2428 "String decoding PE Reference: %.30s\n", str);
2429 ent = xmlParseStringPEReference(ctxt, &str);
2430 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002431 if (ent->content == NULL) {
2432 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2433 }
2434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 ctxt->depth++;
2436 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2437 0, 0, 0);
2438 ctxt->depth--;
2439 if (rep != NULL) {
2440 current = rep;
2441 while (*current != 0) { /* non input consuming loop */
2442 buffer[nbchars++] = *current++;
2443 if (nbchars >
2444 buffer_size - XML_PARSER_BUFFER_SIZE) {
2445 growBuffer(buffer);
2446 }
2447 }
2448 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002449 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002450 }
2451 }
2452 } else {
2453 COPY_BUF(l,buffer,nbchars,c);
2454 str += l;
2455 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2456 growBuffer(buffer);
2457 }
2458 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002459 if (str < last)
2460 c = CUR_SCHAR(str, l);
2461 else
2462 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002463 }
2464 buffer[nbchars++] = 0;
2465 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002466
2467mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002468 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002469 if (rep != NULL)
2470 xmlFree(rep);
2471 if (buffer != NULL)
2472 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002473 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002474}
2475
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476/**
2477 * xmlStringDecodeEntities:
2478 * @ctxt: the parser context
2479 * @str: the input string
2480 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2481 * @end: an end marker xmlChar, 0 if none
2482 * @end2: an end marker xmlChar, 0 if none
2483 * @end3: an end marker xmlChar, 0 if none
2484 *
2485 * Takes a entity string content and process to do the adequate substitutions.
2486 *
2487 * [67] Reference ::= EntityRef | CharRef
2488 *
2489 * [69] PEReference ::= '%' Name ';'
2490 *
2491 * Returns A newly allocated string with the substitution done. The caller
2492 * must deallocate it !
2493 */
2494xmlChar *
2495xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2496 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002497 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002498 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2499 end, end2, end3));
2500}
Owen Taylor3473f882001-02-23 17:55:21 +00002501
2502/************************************************************************
2503 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002504 * Commodity functions, cleanup needed ? *
2505 * *
2506 ************************************************************************/
2507
2508/**
2509 * areBlanks:
2510 * @ctxt: an XML parser context
2511 * @str: a xmlChar *
2512 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002513 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002514 *
2515 * Is this a sequence of blank chars that one can ignore ?
2516 *
2517 * Returns 1 if ignorable 0 otherwise.
2518 */
2519
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002520static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2521 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002522 int i, ret;
2523 xmlNodePtr lastChild;
2524
Daniel Veillard05c13a22001-09-09 08:38:09 +00002525 /*
2526 * Don't spend time trying to differentiate them, the same callback is
2527 * used !
2528 */
2529 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002530 return(0);
2531
Owen Taylor3473f882001-02-23 17:55:21 +00002532 /*
2533 * Check for xml:space value.
2534 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002535 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2536 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002537 return(0);
2538
2539 /*
2540 * Check that the string is made of blanks
2541 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002542 if (blank_chars == 0) {
2543 for (i = 0;i < len;i++)
2544 if (!(IS_BLANK_CH(str[i]))) return(0);
2545 }
Owen Taylor3473f882001-02-23 17:55:21 +00002546
2547 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002548 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002549 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002550 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (ctxt->myDoc != NULL) {
2552 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2553 if (ret == 0) return(1);
2554 if (ret == 1) return(0);
2555 }
2556
2557 /*
2558 * Otherwise, heuristic :-\
2559 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002560 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002561 if ((ctxt->node->children == NULL) &&
2562 (RAW == '<') && (NXT(1) == '/')) return(0);
2563
2564 lastChild = xmlGetLastChild(ctxt->node);
2565 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002566 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2567 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else if (xmlNodeIsText(lastChild))
2569 return(0);
2570 else if ((ctxt->node->children != NULL) &&
2571 (xmlNodeIsText(ctxt->node->children)))
2572 return(0);
2573 return(1);
2574}
2575
Owen Taylor3473f882001-02-23 17:55:21 +00002576/************************************************************************
2577 * *
2578 * Extra stuff for namespace support *
2579 * Relates to http://www.w3.org/TR/WD-xml-names *
2580 * *
2581 ************************************************************************/
2582
2583/**
2584 * xmlSplitQName:
2585 * @ctxt: an XML parser context
2586 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002588 *
2589 * parse an UTF8 encoded XML qualified name string
2590 *
2591 * [NS 5] QName ::= (Prefix ':')? LocalPart
2592 *
2593 * [NS 6] Prefix ::= NCName
2594 *
2595 * [NS 7] LocalPart ::= NCName
2596 *
2597 * Returns the local part, and prefix is updated
2598 * to get the Prefix if any.
2599 */
2600
2601xmlChar *
2602xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2603 xmlChar buf[XML_MAX_NAMELEN + 5];
2604 xmlChar *buffer = NULL;
2605 int len = 0;
2606 int max = XML_MAX_NAMELEN;
2607 xmlChar *ret = NULL;
2608 const xmlChar *cur = name;
2609 int c;
2610
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002611 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 *prefix = NULL;
2613
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002614 if (cur == NULL) return(NULL);
2615
Owen Taylor3473f882001-02-23 17:55:21 +00002616#ifndef XML_XML_NAMESPACE
2617 /* xml: prefix is not really a namespace */
2618 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2619 (cur[2] == 'l') && (cur[3] == ':'))
2620 return(xmlStrdup(name));
2621#endif
2622
Daniel Veillard597bc482003-07-24 16:08:28 +00002623 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (cur[0] == ':')
2625 return(xmlStrdup(name));
2626
2627 c = *cur++;
2628 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2629 buf[len++] = c;
2630 c = *cur++;
2631 }
2632 if (len >= max) {
2633 /*
2634 * Okay someone managed to make a huge name, so he's ready to pay
2635 * for the processing speed.
2636 */
2637 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002638
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002639 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002640 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002641 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 return(NULL);
2643 }
2644 memcpy(buffer, buf, len);
2645 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2646 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002647 xmlChar *tmp;
2648
Owen Taylor3473f882001-02-23 17:55:21 +00002649 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002650 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002651 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002652 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002653 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002654 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002655 return(NULL);
2656 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002657 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002658 }
2659 buffer[len++] = c;
2660 c = *cur++;
2661 }
2662 buffer[len] = 0;
2663 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002664
Daniel Veillard597bc482003-07-24 16:08:28 +00002665 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002666 if (buffer != NULL)
2667 xmlFree(buffer);
2668 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002669 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002670 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002671
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL)
2673 ret = xmlStrndup(buf, len);
2674 else {
2675 ret = buffer;
2676 buffer = NULL;
2677 max = XML_MAX_NAMELEN;
2678 }
2679
2680
2681 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002682 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002683 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002684 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002685 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002686 }
Owen Taylor3473f882001-02-23 17:55:21 +00002687 len = 0;
2688
Daniel Veillardbb284f42002-10-16 18:02:47 +00002689 /*
2690 * Check that the first character is proper to start
2691 * a new name
2692 */
2693 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2694 ((c >= 0x41) && (c <= 0x5A)) ||
2695 (c == '_') || (c == ':'))) {
2696 int l;
2697 int first = CUR_SCHAR(cur, l);
2698
2699 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002700 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002701 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002702 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002703 }
2704 }
2705 cur++;
2706
Owen Taylor3473f882001-02-23 17:55:21 +00002707 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2708 buf[len++] = c;
2709 c = *cur++;
2710 }
2711 if (len >= max) {
2712 /*
2713 * Okay someone managed to make a huge name, so he's ready to pay
2714 * for the processing speed.
2715 */
2716 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002717
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002718 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002719 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002720 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002721 return(NULL);
2722 }
2723 memcpy(buffer, buf, len);
2724 while (c != 0) { /* tested bigname2.xml */
2725 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002726 xmlChar *tmp;
2727
Owen Taylor3473f882001-02-23 17:55:21 +00002728 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002729 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002730 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002731 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002732 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002733 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002736 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002737 }
2738 buffer[len++] = c;
2739 c = *cur++;
2740 }
2741 buffer[len] = 0;
2742 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002743
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if (buffer == NULL)
2745 ret = xmlStrndup(buf, len);
2746 else {
2747 ret = buffer;
2748 }
2749 }
2750
2751 return(ret);
2752}
2753
2754/************************************************************************
2755 * *
2756 * The parser itself *
2757 * Relates to http://www.w3.org/TR/REC-xml *
2758 * *
2759 ************************************************************************/
2760
Daniel Veillard34e3f642008-07-29 09:02:27 +00002761/************************************************************************
2762 * *
2763 * Routines to parse Name, NCName and NmToken *
2764 * *
2765 ************************************************************************/
2766unsigned long nbParseName = 0;
2767unsigned long nbParseNmToken = 0;
2768unsigned long nbParseNCName = 0;
2769unsigned long nbParseNCNameComplex = 0;
2770unsigned long nbParseNameComplex = 0;
2771unsigned long nbParseStringName = 0;
2772/*
2773 * The two following functions are related to the change of accepted
2774 * characters for Name and NmToken in the Revision 5 of XML-1.0
2775 * They correspond to the modified production [4] and the new production [4a]
2776 * changes in that revision. Also note that the macros used for the
2777 * productions Letter, Digit, CombiningChar and Extender are not needed
2778 * anymore.
2779 * We still keep compatibility to pre-revision5 parsing semantic if the
2780 * new XML_PARSE_OLD10 option is given to the parser.
2781 */
2782static int
2783xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2784 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2785 /*
2786 * Use the new checks of production [4] [4a] amd [5] of the
2787 * Update 5 of XML-1.0
2788 */
2789 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2790 (((c >= 'a') && (c <= 'z')) ||
2791 ((c >= 'A') && (c <= 'Z')) ||
2792 (c == '_') || (c == ':') ||
2793 ((c >= 0xC0) && (c <= 0xD6)) ||
2794 ((c >= 0xD8) && (c <= 0xF6)) ||
2795 ((c >= 0xF8) && (c <= 0x2FF)) ||
2796 ((c >= 0x370) && (c <= 0x37D)) ||
2797 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2798 ((c >= 0x200C) && (c <= 0x200D)) ||
2799 ((c >= 0x2070) && (c <= 0x218F)) ||
2800 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2801 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2802 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2803 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2804 ((c >= 0x10000) && (c <= 0xEFFFF))))
2805 return(1);
2806 } else {
2807 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2808 return(1);
2809 }
2810 return(0);
2811}
2812
2813static int
2814xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2815 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2816 /*
2817 * Use the new checks of production [4] [4a] amd [5] of the
2818 * Update 5 of XML-1.0
2819 */
2820 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2821 (((c >= 'a') && (c <= 'z')) ||
2822 ((c >= 'A') && (c <= 'Z')) ||
2823 ((c >= '0') && (c <= '9')) || /* !start */
2824 (c == '_') || (c == ':') ||
2825 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2826 ((c >= 0xC0) && (c <= 0xD6)) ||
2827 ((c >= 0xD8) && (c <= 0xF6)) ||
2828 ((c >= 0xF8) && (c <= 0x2FF)) ||
2829 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2830 ((c >= 0x370) && (c <= 0x37D)) ||
2831 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2832 ((c >= 0x200C) && (c <= 0x200D)) ||
2833 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2834 ((c >= 0x2070) && (c <= 0x218F)) ||
2835 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2836 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2837 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2838 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2839 ((c >= 0x10000) && (c <= 0xEFFFF))))
2840 return(1);
2841 } else {
2842 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2843 (c == '.') || (c == '-') ||
2844 (c == '_') || (c == ':') ||
2845 (IS_COMBINING(c)) ||
2846 (IS_EXTENDER(c)))
2847 return(1);
2848 }
2849 return(0);
2850}
2851
Daniel Veillarde57ec792003-09-10 10:50:59 +00002852static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002853 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002854
Daniel Veillard34e3f642008-07-29 09:02:27 +00002855static const xmlChar *
2856xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2857 int len = 0, l;
2858 int c;
2859 int count = 0;
2860
2861 nbParseNameComplex++;
2862
2863 /*
2864 * Handler for more complex cases
2865 */
2866 GROW;
2867 c = CUR_CHAR(l);
2868 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2869 /*
2870 * Use the new checks of production [4] [4a] amd [5] of the
2871 * Update 5 of XML-1.0
2872 */
2873 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2874 (!(((c >= 'a') && (c <= 'z')) ||
2875 ((c >= 'A') && (c <= 'Z')) ||
2876 (c == '_') || (c == ':') ||
2877 ((c >= 0xC0) && (c <= 0xD6)) ||
2878 ((c >= 0xD8) && (c <= 0xF6)) ||
2879 ((c >= 0xF8) && (c <= 0x2FF)) ||
2880 ((c >= 0x370) && (c <= 0x37D)) ||
2881 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2882 ((c >= 0x200C) && (c <= 0x200D)) ||
2883 ((c >= 0x2070) && (c <= 0x218F)) ||
2884 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2885 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2886 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2887 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2888 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
2889 return(NULL);
2890 }
2891 len += l;
2892 NEXTL(l);
2893 c = CUR_CHAR(l);
2894 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2895 (((c >= 'a') && (c <= 'z')) ||
2896 ((c >= 'A') && (c <= 'Z')) ||
2897 ((c >= '0') && (c <= '9')) || /* !start */
2898 (c == '_') || (c == ':') ||
2899 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2900 ((c >= 0xC0) && (c <= 0xD6)) ||
2901 ((c >= 0xD8) && (c <= 0xF6)) ||
2902 ((c >= 0xF8) && (c <= 0x2FF)) ||
2903 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2904 ((c >= 0x370) && (c <= 0x37D)) ||
2905 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2906 ((c >= 0x200C) && (c <= 0x200D)) ||
2907 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2908 ((c >= 0x2070) && (c <= 0x218F)) ||
2909 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2910 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2911 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2912 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2913 ((c >= 0x10000) && (c <= 0xEFFFF))
2914 )) {
2915 if (count++ > 100) {
2916 count = 0;
2917 GROW;
2918 }
2919 len += l;
2920 NEXTL(l);
2921 c = CUR_CHAR(l);
2922 }
2923 } else {
2924 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2925 (!IS_LETTER(c) && (c != '_') &&
2926 (c != ':'))) {
2927 return(NULL);
2928 }
2929 len += l;
2930 NEXTL(l);
2931 c = CUR_CHAR(l);
2932
2933 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2934 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2935 (c == '.') || (c == '-') ||
2936 (c == '_') || (c == ':') ||
2937 (IS_COMBINING(c)) ||
2938 (IS_EXTENDER(c)))) {
2939 if (count++ > 100) {
2940 count = 0;
2941 GROW;
2942 }
2943 len += l;
2944 NEXTL(l);
2945 c = CUR_CHAR(l);
2946 }
2947 }
2948 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2949 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2950 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2951}
2952
Owen Taylor3473f882001-02-23 17:55:21 +00002953/**
2954 * xmlParseName:
2955 * @ctxt: an XML parser context
2956 *
2957 * parse an XML name.
2958 *
2959 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2960 * CombiningChar | Extender
2961 *
2962 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2963 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002964 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002965 *
2966 * Returns the Name parsed or NULL
2967 */
2968
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002969const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002970xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002971 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002972 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002973 int count = 0;
2974
2975 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002976
Daniel Veillard34e3f642008-07-29 09:02:27 +00002977 nbParseName++;
2978
Daniel Veillard48b2f892001-02-25 16:11:03 +00002979 /*
2980 * Accelerator for simple ASCII names
2981 */
2982 in = ctxt->input->cur;
2983 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2984 ((*in >= 0x41) && (*in <= 0x5A)) ||
2985 (*in == '_') || (*in == ':')) {
2986 in++;
2987 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2988 ((*in >= 0x41) && (*in <= 0x5A)) ||
2989 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002990 (*in == '_') || (*in == '-') ||
2991 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002992 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002993 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002994 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002995 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002996 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002997 ctxt->nbChars += count;
2998 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002999 if (ret == NULL)
3000 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003001 return(ret);
3002 }
3003 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003004 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003005 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003006}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003007
Daniel Veillard34e3f642008-07-29 09:02:27 +00003008static const xmlChar *
3009xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3010 int len = 0, l;
3011 int c;
3012 int count = 0;
3013
3014 nbParseNCNameComplex++;
3015
3016 /*
3017 * Handler for more complex cases
3018 */
3019 GROW;
3020 c = CUR_CHAR(l);
3021 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3022 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3023 return(NULL);
3024 }
3025
3026 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3027 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3028 if (count++ > 100) {
3029 count = 0;
3030 GROW;
3031 }
3032 len += l;
3033 NEXTL(l);
3034 c = CUR_CHAR(l);
3035 }
3036 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3037}
3038
3039/**
3040 * xmlParseNCName:
3041 * @ctxt: an XML parser context
3042 * @len: lenght of the string parsed
3043 *
3044 * parse an XML name.
3045 *
3046 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3047 * CombiningChar | Extender
3048 *
3049 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3050 *
3051 * Returns the Name parsed or NULL
3052 */
3053
3054static const xmlChar *
3055xmlParseNCName(xmlParserCtxtPtr ctxt) {
3056 const xmlChar *in;
3057 const xmlChar *ret;
3058 int count = 0;
3059
3060 nbParseNCName++;
3061
3062 /*
3063 * Accelerator for simple ASCII names
3064 */
3065 in = ctxt->input->cur;
3066 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3067 ((*in >= 0x41) && (*in <= 0x5A)) ||
3068 (*in == '_')) {
3069 in++;
3070 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3071 ((*in >= 0x41) && (*in <= 0x5A)) ||
3072 ((*in >= 0x30) && (*in <= 0x39)) ||
3073 (*in == '_') || (*in == '-') ||
3074 (*in == '.'))
3075 in++;
3076 if ((*in > 0) && (*in < 0x80)) {
3077 count = in - ctxt->input->cur;
3078 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3079 ctxt->input->cur = in;
3080 ctxt->nbChars += count;
3081 ctxt->input->col += count;
3082 if (ret == NULL) {
3083 xmlErrMemory(ctxt, NULL);
3084 }
3085 return(ret);
3086 }
3087 }
3088 return(xmlParseNCNameComplex(ctxt));
3089}
3090
Daniel Veillard46de64e2002-05-29 08:21:33 +00003091/**
3092 * xmlParseNameAndCompare:
3093 * @ctxt: an XML parser context
3094 *
3095 * parse an XML name and compares for match
3096 * (specialized for endtag parsing)
3097 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003098 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3099 * and the name for mismatch
3100 */
3101
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003102static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003103xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003104 register const xmlChar *cmp = other;
3105 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003106 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003107
3108 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003109
Daniel Veillard46de64e2002-05-29 08:21:33 +00003110 in = ctxt->input->cur;
3111 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003112 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003113 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003114 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003115 }
William M. Brack76e95df2003-10-18 16:20:14 +00003116 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003117 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003118 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003119 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003120 }
3121 /* failure (or end of input buffer), check with full function */
3122 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003123 /* strings coming from the dictionnary direct compare possible */
3124 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003125 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003126 }
3127 return ret;
3128}
3129
Owen Taylor3473f882001-02-23 17:55:21 +00003130/**
3131 * xmlParseStringName:
3132 * @ctxt: an XML parser context
3133 * @str: a pointer to the string pointer (IN/OUT)
3134 *
3135 * parse an XML name.
3136 *
3137 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3138 * CombiningChar | Extender
3139 *
3140 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3141 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003142 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003143 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003144 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003145 * is updated to the current location in the string.
3146 */
3147
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003148static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003149xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3150 xmlChar buf[XML_MAX_NAMELEN + 5];
3151 const xmlChar *cur = *str;
3152 int len = 0, l;
3153 int c;
3154
Daniel Veillard34e3f642008-07-29 09:02:27 +00003155 nbParseStringName++;
3156
Owen Taylor3473f882001-02-23 17:55:21 +00003157 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003159 return(NULL);
3160 }
3161
Daniel Veillard34e3f642008-07-29 09:02:27 +00003162 COPY_BUF(l,buf,len,c);
3163 cur += l;
3164 c = CUR_SCHAR(cur, l);
3165 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003166 COPY_BUF(l,buf,len,c);
3167 cur += l;
3168 c = CUR_SCHAR(cur, l);
3169 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3170 /*
3171 * Okay someone managed to make a huge name, so he's ready to pay
3172 * for the processing speed.
3173 */
3174 xmlChar *buffer;
3175 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003177 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003178 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003179 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003180 return(NULL);
3181 }
3182 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003183 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003184 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003185 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003186 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003187 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003188 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003189 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003190 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003191 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003192 return(NULL);
3193 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003194 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003195 }
3196 COPY_BUF(l,buffer,len,c);
3197 cur += l;
3198 c = CUR_SCHAR(cur, l);
3199 }
3200 buffer[len] = 0;
3201 *str = cur;
3202 return(buffer);
3203 }
3204 }
3205 *str = cur;
3206 return(xmlStrndup(buf, len));
3207}
3208
3209/**
3210 * xmlParseNmtoken:
3211 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 *
Owen Taylor3473f882001-02-23 17:55:21 +00003213 * parse an XML Nmtoken.
3214 *
3215 * [7] Nmtoken ::= (NameChar)+
3216 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003217 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003218 *
3219 * Returns the Nmtoken parsed or NULL
3220 */
3221
3222xmlChar *
3223xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3224 xmlChar buf[XML_MAX_NAMELEN + 5];
3225 int len = 0, l;
3226 int c;
3227 int count = 0;
3228
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 nbParseNmToken++;
3230
Owen Taylor3473f882001-02-23 17:55:21 +00003231 GROW;
3232 c = CUR_CHAR(l);
3233
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003235 if (count++ > 100) {
3236 count = 0;
3237 GROW;
3238 }
3239 COPY_BUF(l,buf,len,c);
3240 NEXTL(l);
3241 c = CUR_CHAR(l);
3242 if (len >= XML_MAX_NAMELEN) {
3243 /*
3244 * Okay someone managed to make a huge token, so he's ready to pay
3245 * for the processing speed.
3246 */
3247 xmlChar *buffer;
3248 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003249
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003250 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003252 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003253 return(NULL);
3254 }
3255 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003256 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003257 if (count++ > 100) {
3258 count = 0;
3259 GROW;
3260 }
3261 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003262 xmlChar *tmp;
3263
Owen Taylor3473f882001-02-23 17:55:21 +00003264 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003265 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003266 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003267 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003268 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003269 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003270 return(NULL);
3271 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003272 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003273 }
3274 COPY_BUF(l,buffer,len,c);
3275 NEXTL(l);
3276 c = CUR_CHAR(l);
3277 }
3278 buffer[len] = 0;
3279 return(buffer);
3280 }
3281 }
3282 if (len == 0)
3283 return(NULL);
3284 return(xmlStrndup(buf, len));
3285}
3286
3287/**
3288 * xmlParseEntityValue:
3289 * @ctxt: an XML parser context
3290 * @orig: if non-NULL store a copy of the original entity value
3291 *
3292 * parse a value for ENTITY declarations
3293 *
3294 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3295 * "'" ([^%&'] | PEReference | Reference)* "'"
3296 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003297 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003298 */
3299
3300xmlChar *
3301xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3302 xmlChar *buf = NULL;
3303 int len = 0;
3304 int size = XML_PARSER_BUFFER_SIZE;
3305 int c, l;
3306 xmlChar stop;
3307 xmlChar *ret = NULL;
3308 const xmlChar *cur = NULL;
3309 xmlParserInputPtr input;
3310
3311 if (RAW == '"') stop = '"';
3312 else if (RAW == '\'') stop = '\'';
3313 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003314 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003315 return(NULL);
3316 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003317 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003318 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003319 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003320 return(NULL);
3321 }
3322
3323 /*
3324 * The content of the entity definition is copied in a buffer.
3325 */
3326
3327 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3328 input = ctxt->input;
3329 GROW;
3330 NEXT;
3331 c = CUR_CHAR(l);
3332 /*
3333 * NOTE: 4.4.5 Included in Literal
3334 * When a parameter entity reference appears in a literal entity
3335 * value, ... a single or double quote character in the replacement
3336 * text is always treated as a normal data character and will not
3337 * terminate the literal.
3338 * In practice it means we stop the loop only when back at parsing
3339 * the initial entity and the quote is found
3340 */
William M. Brack871611b2003-10-18 04:53:14 +00003341 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003342 (ctxt->input != input))) {
3343 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003344 xmlChar *tmp;
3345
Owen Taylor3473f882001-02-23 17:55:21 +00003346 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3348 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003349 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003351 return(NULL);
3352 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003354 }
3355 COPY_BUF(l,buf,len,c);
3356 NEXTL(l);
3357 /*
3358 * Pop-up of finished entities.
3359 */
3360 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3361 xmlPopInput(ctxt);
3362
3363 GROW;
3364 c = CUR_CHAR(l);
3365 if (c == 0) {
3366 GROW;
3367 c = CUR_CHAR(l);
3368 }
3369 }
3370 buf[len] = 0;
3371
3372 /*
3373 * Raise problem w.r.t. '&' and '%' being used in non-entities
3374 * reference constructs. Note Charref will be handled in
3375 * xmlStringDecodeEntities()
3376 */
3377 cur = buf;
3378 while (*cur != 0) { /* non input consuming */
3379 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3380 xmlChar *name;
3381 xmlChar tmp = *cur;
3382
3383 cur++;
3384 name = xmlParseStringName(ctxt, &cur);
3385 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003386 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003387 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003388 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003390 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3391 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003392 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003393 }
3394 if (name != NULL)
3395 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003396 if (*cur == 0)
3397 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003398 }
3399 cur++;
3400 }
3401
3402 /*
3403 * Then PEReference entities are substituted.
3404 */
3405 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003406 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407 xmlFree(buf);
3408 } else {
3409 NEXT;
3410 /*
3411 * NOTE: 4.4.7 Bypassed
3412 * When a general entity reference appears in the EntityValue in
3413 * an entity declaration, it is bypassed and left as is.
3414 * so XML_SUBSTITUTE_REF is not set here.
3415 */
3416 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3417 0, 0, 0);
3418 if (orig != NULL)
3419 *orig = buf;
3420 else
3421 xmlFree(buf);
3422 }
3423
3424 return(ret);
3425}
3426
3427/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003428 * xmlParseAttValueComplex:
3429 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003430 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003431 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003432 *
3433 * parse a value for an attribute, this is the fallback function
3434 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003435 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003436 *
3437 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3438 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003439static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003440xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003441 xmlChar limit = 0;
3442 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003443 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 int len = 0;
3445 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003446 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003447 xmlChar *current = NULL;
3448 xmlEntityPtr ent;
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450 if (NXT(0) == '"') {
3451 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3452 limit = '"';
3453 NEXT;
3454 } else if (NXT(0) == '\'') {
3455 limit = '\'';
3456 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3457 NEXT;
3458 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003459 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003460 return(NULL);
3461 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003462
Owen Taylor3473f882001-02-23 17:55:21 +00003463 /*
3464 * allocate a translation buffer.
3465 */
3466 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003467 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003468 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003469
3470 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003471 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003472 */
3473 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003474 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003475 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003476 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003477 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003478 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003479 if (NXT(1) == '#') {
3480 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003481
Owen Taylor3473f882001-02-23 17:55:21 +00003482 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003483 if (ctxt->replaceEntities) {
3484 if (len > buf_size - 10) {
3485 growBuffer(buf);
3486 }
3487 buf[len++] = '&';
3488 } else {
3489 /*
3490 * The reparsing will be done in xmlStringGetNodeList()
3491 * called by the attribute() function in SAX.c
3492 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003493 if (len > buf_size - 10) {
3494 growBuffer(buf);
3495 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003496 buf[len++] = '&';
3497 buf[len++] = '#';
3498 buf[len++] = '3';
3499 buf[len++] = '8';
3500 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003501 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003502 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003503 if (len > buf_size - 10) {
3504 growBuffer(buf);
3505 }
Owen Taylor3473f882001-02-23 17:55:21 +00003506 len += xmlCopyChar(0, &buf[len], val);
3507 }
3508 } else {
3509 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003510 if ((ent != NULL) &&
3511 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3512 if (len > buf_size - 10) {
3513 growBuffer(buf);
3514 }
3515 if ((ctxt->replaceEntities == 0) &&
3516 (ent->content[0] == '&')) {
3517 buf[len++] = '&';
3518 buf[len++] = '#';
3519 buf[len++] = '3';
3520 buf[len++] = '8';
3521 buf[len++] = ';';
3522 } else {
3523 buf[len++] = ent->content[0];
3524 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003525 } else if ((ent != NULL) &&
3526 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003527 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3528 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003529 XML_SUBSTITUTE_REF,
3530 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003531 if (rep != NULL) {
3532 current = rep;
3533 while (*current != 0) { /* non input consuming */
3534 buf[len++] = *current++;
3535 if (len > buf_size - 10) {
3536 growBuffer(buf);
3537 }
3538 }
3539 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003540 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003541 }
3542 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003543 if (len > buf_size - 10) {
3544 growBuffer(buf);
3545 }
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if (ent->content != NULL)
3547 buf[len++] = ent->content[0];
3548 }
3549 } else if (ent != NULL) {
3550 int i = xmlStrlen(ent->name);
3551 const xmlChar *cur = ent->name;
3552
3553 /*
3554 * This may look absurd but is needed to detect
3555 * entities problems
3556 */
3557 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3558 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003559 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003560 XML_SUBSTITUTE_REF, 0, 0, 0);
3561 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003562 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003563 rep = NULL;
3564 }
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
3566
3567 /*
3568 * Just output the reference
3569 */
3570 buf[len++] = '&';
3571 if (len > buf_size - i - 10) {
3572 growBuffer(buf);
3573 }
3574 for (;i > 0;i--)
3575 buf[len++] = *cur++;
3576 buf[len++] = ';';
3577 }
3578 }
3579 } else {
3580 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003581 if ((len != 0) || (!normalize)) {
3582 if ((!normalize) || (!in_space)) {
3583 COPY_BUF(l,buf,len,0x20);
3584 if (len > buf_size - 10) {
3585 growBuffer(buf);
3586 }
3587 }
3588 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 }
3590 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003591 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 COPY_BUF(l,buf,len,c);
3593 if (len > buf_size - 10) {
3594 growBuffer(buf);
3595 }
3596 }
3597 NEXTL(l);
3598 }
3599 GROW;
3600 c = CUR_CHAR(l);
3601 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 if ((in_space) && (normalize)) {
3603 while (buf[len - 1] == 0x20) len--;
3604 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003605 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003607 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003608 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003609 if ((c != 0) && (!IS_CHAR(c))) {
3610 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3611 "invalid character in attribute value\n");
3612 } else {
3613 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3614 "AttValue: ' expected\n");
3615 }
Owen Taylor3473f882001-02-23 17:55:21 +00003616 } else
3617 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003618 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003619 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003620
3621mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003622 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003623 if (buf != NULL)
3624 xmlFree(buf);
3625 if (rep != NULL)
3626 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003627 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003628}
3629
3630/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003631 * xmlParseAttValue:
3632 * @ctxt: an XML parser context
3633 *
3634 * parse a value for an attribute
3635 * Note: the parser won't do substitution of entities here, this
3636 * will be handled later in xmlStringGetNodeList
3637 *
3638 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3639 * "'" ([^<&'] | Reference)* "'"
3640 *
3641 * 3.3.3 Attribute-Value Normalization:
3642 * Before the value of an attribute is passed to the application or
3643 * checked for validity, the XML processor must normalize it as follows:
3644 * - a character reference is processed by appending the referenced
3645 * character to the attribute value
3646 * - an entity reference is processed by recursively processing the
3647 * replacement text of the entity
3648 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3649 * appending #x20 to the normalized value, except that only a single
3650 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3651 * parsed entity or the literal entity value of an internal parsed entity
3652 * - other characters are processed by appending them to the normalized value
3653 * If the declared value is not CDATA, then the XML processor must further
3654 * process the normalized attribute value by discarding any leading and
3655 * trailing space (#x20) characters, and by replacing sequences of space
3656 * (#x20) characters by a single space (#x20) character.
3657 * All attributes for which no declaration has been read should be treated
3658 * by a non-validating parser as if declared CDATA.
3659 *
3660 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3661 */
3662
3663
3664xmlChar *
3665xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003666 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003668}
3669
3670/**
Owen Taylor3473f882001-02-23 17:55:21 +00003671 * xmlParseSystemLiteral:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an XML Literal
3675 *
3676 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3677 *
3678 * Returns the SystemLiteral parsed or NULL
3679 */
3680
3681xmlChar *
3682xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3683 xmlChar *buf = NULL;
3684 int len = 0;
3685 int size = XML_PARSER_BUFFER_SIZE;
3686 int cur, l;
3687 xmlChar stop;
3688 int state = ctxt->instate;
3689 int count = 0;
3690
3691 SHRINK;
3692 if (RAW == '"') {
3693 NEXT;
3694 stop = '"';
3695 } else if (RAW == '\'') {
3696 NEXT;
3697 stop = '\'';
3698 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003699 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003700 return(NULL);
3701 }
3702
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003703 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003704 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003705 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 return(NULL);
3707 }
3708 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3709 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003710 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003711 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003712 xmlChar *tmp;
3713
Owen Taylor3473f882001-02-23 17:55:21 +00003714 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003715 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3716 if (tmp == NULL) {
3717 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003718 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003719 ctxt->instate = (xmlParserInputState) state;
3720 return(NULL);
3721 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003722 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003723 }
3724 count++;
3725 if (count > 50) {
3726 GROW;
3727 count = 0;
3728 }
3729 COPY_BUF(l,buf,len,cur);
3730 NEXTL(l);
3731 cur = CUR_CHAR(l);
3732 if (cur == 0) {
3733 GROW;
3734 SHRINK;
3735 cur = CUR_CHAR(l);
3736 }
3737 }
3738 buf[len] = 0;
3739 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003740 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003741 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 } else {
3743 NEXT;
3744 }
3745 return(buf);
3746}
3747
3748/**
3749 * xmlParsePubidLiteral:
3750 * @ctxt: an XML parser context
3751 *
3752 * parse an XML public literal
3753 *
3754 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3755 *
3756 * Returns the PubidLiteral parsed or NULL.
3757 */
3758
3759xmlChar *
3760xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3761 xmlChar *buf = NULL;
3762 int len = 0;
3763 int size = XML_PARSER_BUFFER_SIZE;
3764 xmlChar cur;
3765 xmlChar stop;
3766 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003767 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003768
3769 SHRINK;
3770 if (RAW == '"') {
3771 NEXT;
3772 stop = '"';
3773 } else if (RAW == '\'') {
3774 NEXT;
3775 stop = '\'';
3776 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 return(NULL);
3779 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003780 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003781 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return(NULL);
3784 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003785 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003786 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003787 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003788 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003789 xmlChar *tmp;
3790
Owen Taylor3473f882001-02-23 17:55:21 +00003791 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003792 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3793 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003794 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003795 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 return(NULL);
3797 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003798 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
3800 buf[len++] = cur;
3801 count++;
3802 if (count > 50) {
3803 GROW;
3804 count = 0;
3805 }
3806 NEXT;
3807 cur = CUR;
3808 if (cur == 0) {
3809 GROW;
3810 SHRINK;
3811 cur = CUR;
3812 }
3813 }
3814 buf[len] = 0;
3815 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 } else {
3818 NEXT;
3819 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003820 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003821 return(buf);
3822}
3823
Daniel Veillard48b2f892001-02-25 16:11:03 +00003824void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003825
3826/*
3827 * used for the test in the inner loop of the char data testing
3828 */
3829static const unsigned char test_char_data[256] = {
3830 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3831 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3832 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3833 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3834 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3835 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3836 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3837 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3838 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3839 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3840 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3841 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3842 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3843 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3844 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3845 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3846 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3847 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3848 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3849 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3850 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3851 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3852 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3853 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3854 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3855 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3856 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3857 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3858 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3859 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3860 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3861 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3862};
3863
Owen Taylor3473f882001-02-23 17:55:21 +00003864/**
3865 * xmlParseCharData:
3866 * @ctxt: an XML parser context
3867 * @cdata: int indicating whether we are within a CDATA section
3868 *
3869 * parse a CharData section.
3870 * if we are within a CDATA section ']]>' marks an end of section.
3871 *
3872 * The right angle bracket (>) may be represented using the string "&gt;",
3873 * and must, for compatibility, be escaped using "&gt;" or a character
3874 * reference when it appears in the string "]]>" in content, when that
3875 * string is not marking the end of a CDATA section.
3876 *
3877 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3878 */
3879
3880void
3881xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003882 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003883 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003884 int line = ctxt->input->line;
3885 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003886 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003887
3888 SHRINK;
3889 GROW;
3890 /*
3891 * Accelerated common case where input don't need to be
3892 * modified before passing it to the handler.
3893 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003894 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003895 in = ctxt->input->cur;
3896 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003897get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003898 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003899 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003900 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003901 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003902 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003903 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003904 goto get_more_space;
3905 }
3906 if (*in == '<') {
3907 nbchar = in - ctxt->input->cur;
3908 if (nbchar > 0) {
3909 const xmlChar *tmp = ctxt->input->cur;
3910 ctxt->input->cur = in;
3911
Daniel Veillard34099b42004-11-04 17:34:35 +00003912 if ((ctxt->sax != NULL) &&
3913 (ctxt->sax->ignorableWhitespace !=
3914 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003915 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003916 if (ctxt->sax->ignorableWhitespace != NULL)
3917 ctxt->sax->ignorableWhitespace(ctxt->userData,
3918 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003919 } else {
3920 if (ctxt->sax->characters != NULL)
3921 ctxt->sax->characters(ctxt->userData,
3922 tmp, nbchar);
3923 if (*ctxt->space == -1)
3924 *ctxt->space = -2;
3925 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003926 } else if ((ctxt->sax != NULL) &&
3927 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003928 ctxt->sax->characters(ctxt->userData,
3929 tmp, nbchar);
3930 }
3931 }
3932 return;
3933 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003934
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003935get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003936 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003937 while (test_char_data[*in]) {
3938 in++;
3939 ccol++;
3940 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003941 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003942 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003943 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003944 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003945 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003946 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003947 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003948 }
3949 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003950 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003952 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003953 return;
3954 }
3955 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003956 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003957 goto get_more;
3958 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003959 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003960 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003961 if ((ctxt->sax != NULL) &&
3962 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003963 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003964 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003965 const xmlChar *tmp = ctxt->input->cur;
3966 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003967
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003968 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003969 if (ctxt->sax->ignorableWhitespace != NULL)
3970 ctxt->sax->ignorableWhitespace(ctxt->userData,
3971 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003972 } else {
3973 if (ctxt->sax->characters != NULL)
3974 ctxt->sax->characters(ctxt->userData,
3975 tmp, nbchar);
3976 if (*ctxt->space == -1)
3977 *ctxt->space = -2;
3978 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003979 line = ctxt->input->line;
3980 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003981 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003982 if (ctxt->sax->characters != NULL)
3983 ctxt->sax->characters(ctxt->userData,
3984 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003985 line = ctxt->input->line;
3986 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003987 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003988 }
3989 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003990 if (*in == 0xD) {
3991 in++;
3992 if (*in == 0xA) {
3993 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003994 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003995 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003996 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003997 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003998 in--;
3999 }
4000 if (*in == '<') {
4001 return;
4002 }
4003 if (*in == '&') {
4004 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004005 }
4006 SHRINK;
4007 GROW;
4008 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004009 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004010 nbchar = 0;
4011 }
Daniel Veillard50582112001-03-26 22:52:16 +00004012 ctxt->input->line = line;
4013 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004014 xmlParseCharDataComplex(ctxt, cdata);
4015}
4016
Daniel Veillard01c13b52002-12-10 15:19:08 +00004017/**
4018 * xmlParseCharDataComplex:
4019 * @ctxt: an XML parser context
4020 * @cdata: int indicating whether we are within a CDATA section
4021 *
4022 * parse a CharData section.this is the fallback function
4023 * of xmlParseCharData() when the parsing requires handling
4024 * of non-ASCII characters.
4025 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004026void
4027xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004028 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4029 int nbchar = 0;
4030 int cur, l;
4031 int count = 0;
4032
4033 SHRINK;
4034 GROW;
4035 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004036 while ((cur != '<') && /* checked */
4037 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004038 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004039 if ((cur == ']') && (NXT(1) == ']') &&
4040 (NXT(2) == '>')) {
4041 if (cdata) break;
4042 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004043 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 }
4046 COPY_BUF(l,buf,nbchar,cur);
4047 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004048 buf[nbchar] = 0;
4049
Owen Taylor3473f882001-02-23 17:55:21 +00004050 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004051 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004052 */
4053 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004054 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004055 if (ctxt->sax->ignorableWhitespace != NULL)
4056 ctxt->sax->ignorableWhitespace(ctxt->userData,
4057 buf, nbchar);
4058 } else {
4059 if (ctxt->sax->characters != NULL)
4060 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004061 if ((ctxt->sax->characters !=
4062 ctxt->sax->ignorableWhitespace) &&
4063 (*ctxt->space == -1))
4064 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
4066 }
4067 nbchar = 0;
4068 }
4069 count++;
4070 if (count > 50) {
4071 GROW;
4072 count = 0;
4073 }
4074 NEXTL(l);
4075 cur = CUR_CHAR(l);
4076 }
4077 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004078 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004080 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004081 */
4082 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004083 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004084 if (ctxt->sax->ignorableWhitespace != NULL)
4085 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4086 } else {
4087 if (ctxt->sax->characters != NULL)
4088 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004089 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4090 (*ctxt->space == -1))
4091 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004092 }
4093 }
4094 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004095 if ((cur != 0) && (!IS_CHAR(cur))) {
4096 /* Generate the error and skip the offending character */
4097 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4098 "PCDATA invalid Char value %d\n",
4099 cur);
4100 NEXTL(l);
4101 }
Owen Taylor3473f882001-02-23 17:55:21 +00004102}
4103
4104/**
4105 * xmlParseExternalID:
4106 * @ctxt: an XML parser context
4107 * @publicID: a xmlChar** receiving PubidLiteral
4108 * @strict: indicate whether we should restrict parsing to only
4109 * production [75], see NOTE below
4110 *
4111 * Parse an External ID or a Public ID
4112 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004113 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004114 * 'PUBLIC' S PubidLiteral S SystemLiteral
4115 *
4116 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4117 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4118 *
4119 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4120 *
4121 * Returns the function returns SystemLiteral and in the second
4122 * case publicID receives PubidLiteral, is strict is off
4123 * it is possible to return NULL and have publicID set.
4124 */
4125
4126xmlChar *
4127xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4128 xmlChar *URI = NULL;
4129
4130 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004131
4132 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004133 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004134 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004135 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4137 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004138 }
4139 SKIP_BLANKS;
4140 URI = xmlParseSystemLiteral(ctxt);
4141 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004142 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004143 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004144 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004145 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004146 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004147 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004148 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004149 }
4150 SKIP_BLANKS;
4151 *publicID = xmlParsePubidLiteral(ctxt);
4152 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004153 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004154 }
4155 if (strict) {
4156 /*
4157 * We don't handle [83] so "S SystemLiteral" is required.
4158 */
William M. Brack76e95df2003-10-18 16:20:14 +00004159 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004161 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004162 }
4163 } else {
4164 /*
4165 * We handle [83] so we return immediately, if
4166 * "S SystemLiteral" is not detected. From a purely parsing
4167 * point of view that's a nice mess.
4168 */
4169 const xmlChar *ptr;
4170 GROW;
4171
4172 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004173 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004174
William M. Brack76e95df2003-10-18 16:20:14 +00004175 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004176 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4177 }
4178 SKIP_BLANKS;
4179 URI = xmlParseSystemLiteral(ctxt);
4180 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004181 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004182 }
4183 }
4184 return(URI);
4185}
4186
4187/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004188 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004189 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004190 * @buf: the already parsed part of the buffer
4191 * @len: number of bytes filles in the buffer
4192 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004193 *
4194 * Skip an XML (SGML) comment <!-- .... -->
4195 * The spec says that "For compatibility, the string "--" (double-hyphen)
4196 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004197 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004198 *
4199 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4200 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004201static void
4202xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004203 int q, ql;
4204 int r, rl;
4205 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004206 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004207 int inputid;
4208
4209 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004210
Owen Taylor3473f882001-02-23 17:55:21 +00004211 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004212 len = 0;
4213 size = XML_PARSER_BUFFER_SIZE;
4214 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4215 if (buf == NULL) {
4216 xmlErrMemory(ctxt, NULL);
4217 return;
4218 }
Owen Taylor3473f882001-02-23 17:55:21 +00004219 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004220 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004221 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004222 if (q == 0)
4223 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004224 if (!IS_CHAR(q)) {
4225 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4226 "xmlParseComment: invalid xmlChar value %d\n",
4227 q);
4228 xmlFree (buf);
4229 return;
4230 }
Owen Taylor3473f882001-02-23 17:55:21 +00004231 NEXTL(ql);
4232 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004233 if (r == 0)
4234 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004235 if (!IS_CHAR(r)) {
4236 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4237 "xmlParseComment: invalid xmlChar value %d\n",
4238 q);
4239 xmlFree (buf);
4240 return;
4241 }
Owen Taylor3473f882001-02-23 17:55:21 +00004242 NEXTL(rl);
4243 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004244 if (cur == 0)
4245 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004246 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004247 ((cur != '>') ||
4248 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004249 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004250 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004251 }
4252 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004253 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004254 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004255 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4256 if (new_buf == NULL) {
4257 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004258 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 return;
4260 }
William M. Bracka3215c72004-07-31 16:24:01 +00004261 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004262 }
4263 COPY_BUF(ql,buf,len,q);
4264 q = r;
4265 ql = rl;
4266 r = cur;
4267 rl = l;
4268
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
4273 }
4274 NEXTL(l);
4275 cur = CUR_CHAR(l);
4276 if (cur == 0) {
4277 SHRINK;
4278 GROW;
4279 cur = CUR_CHAR(l);
4280 }
4281 }
4282 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004283 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004284 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004285 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004286 } else if (!IS_CHAR(cur)) {
4287 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4288 "xmlParseComment: invalid xmlChar value %d\n",
4289 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004290 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004291 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004292 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4293 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004294 }
4295 NEXT;
4296 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4297 (!ctxt->disableSAX))
4298 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004299 }
Daniel Veillardda629342007-08-01 07:49:06 +00004300 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004301 return;
4302not_terminated:
4303 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4304 "Comment not terminated\n", NULL);
4305 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004306 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004307}
Daniel Veillardda629342007-08-01 07:49:06 +00004308
Daniel Veillard4c778d82005-01-23 17:37:44 +00004309/**
4310 * xmlParseComment:
4311 * @ctxt: an XML parser context
4312 *
4313 * Skip an XML (SGML) comment <!-- .... -->
4314 * The spec says that "For compatibility, the string "--" (double-hyphen)
4315 * must not occur within comments. "
4316 *
4317 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4318 */
4319void
4320xmlParseComment(xmlParserCtxtPtr ctxt) {
4321 xmlChar *buf = NULL;
4322 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004323 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004324 xmlParserInputState state;
4325 const xmlChar *in;
4326 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004327 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004328
4329 /*
4330 * Check that there is a comment right here.
4331 */
4332 if ((RAW != '<') || (NXT(1) != '!') ||
4333 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004334 state = ctxt->instate;
4335 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004336 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004337 SKIP(4);
4338 SHRINK;
4339 GROW;
4340
4341 /*
4342 * Accelerated common case where input don't need to be
4343 * modified before passing it to the handler.
4344 */
4345 in = ctxt->input->cur;
4346 do {
4347 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004348 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004349 ctxt->input->line++; ctxt->input->col = 1;
4350 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004351 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004352 }
4353get_more:
4354 ccol = ctxt->input->col;
4355 while (((*in > '-') && (*in <= 0x7F)) ||
4356 ((*in >= 0x20) && (*in < '-')) ||
4357 (*in == 0x09)) {
4358 in++;
4359 ccol++;
4360 }
4361 ctxt->input->col = ccol;
4362 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004363 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004364 ctxt->input->line++; ctxt->input->col = 1;
4365 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004366 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004367 goto get_more;
4368 }
4369 nbchar = in - ctxt->input->cur;
4370 /*
4371 * save current set of data
4372 */
4373 if (nbchar > 0) {
4374 if ((ctxt->sax != NULL) &&
4375 (ctxt->sax->comment != NULL)) {
4376 if (buf == NULL) {
4377 if ((*in == '-') && (in[1] == '-'))
4378 size = nbchar + 1;
4379 else
4380 size = XML_PARSER_BUFFER_SIZE + nbchar;
4381 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4382 if (buf == NULL) {
4383 xmlErrMemory(ctxt, NULL);
4384 ctxt->instate = state;
4385 return;
4386 }
4387 len = 0;
4388 } else if (len + nbchar + 1 >= size) {
4389 xmlChar *new_buf;
4390 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4391 new_buf = (xmlChar *) xmlRealloc(buf,
4392 size * sizeof(xmlChar));
4393 if (new_buf == NULL) {
4394 xmlFree (buf);
4395 xmlErrMemory(ctxt, NULL);
4396 ctxt->instate = state;
4397 return;
4398 }
4399 buf = new_buf;
4400 }
4401 memcpy(&buf[len], ctxt->input->cur, nbchar);
4402 len += nbchar;
4403 buf[len] = 0;
4404 }
4405 }
4406 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004407 if (*in == 0xA) {
4408 in++;
4409 ctxt->input->line++; ctxt->input->col = 1;
4410 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004411 if (*in == 0xD) {
4412 in++;
4413 if (*in == 0xA) {
4414 ctxt->input->cur = in;
4415 in++;
4416 ctxt->input->line++; ctxt->input->col = 1;
4417 continue; /* while */
4418 }
4419 in--;
4420 }
4421 SHRINK;
4422 GROW;
4423 in = ctxt->input->cur;
4424 if (*in == '-') {
4425 if (in[1] == '-') {
4426 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004427 if (ctxt->input->id != inputid) {
4428 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4429 "comment doesn't start and stop in the same entity\n");
4430 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004431 SKIP(3);
4432 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4433 (!ctxt->disableSAX)) {
4434 if (buf != NULL)
4435 ctxt->sax->comment(ctxt->userData, buf);
4436 else
4437 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4438 }
4439 if (buf != NULL)
4440 xmlFree(buf);
4441 ctxt->instate = state;
4442 return;
4443 }
4444 if (buf != NULL)
4445 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4446 "Comment not terminated \n<!--%.50s\n",
4447 buf);
4448 else
4449 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4450 "Comment not terminated \n", NULL);
4451 in++;
4452 ctxt->input->col++;
4453 }
4454 in++;
4455 ctxt->input->col++;
4456 goto get_more;
4457 }
4458 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4459 xmlParseCommentComplex(ctxt, buf, len, size);
4460 ctxt->instate = state;
4461 return;
4462}
4463
Owen Taylor3473f882001-02-23 17:55:21 +00004464
4465/**
4466 * xmlParsePITarget:
4467 * @ctxt: an XML parser context
4468 *
4469 * parse the name of a PI
4470 *
4471 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4472 *
4473 * Returns the PITarget name or NULL
4474 */
4475
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004476const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004477xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004478 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004479
4480 name = xmlParseName(ctxt);
4481 if ((name != NULL) &&
4482 ((name[0] == 'x') || (name[0] == 'X')) &&
4483 ((name[1] == 'm') || (name[1] == 'M')) &&
4484 ((name[2] == 'l') || (name[2] == 'L'))) {
4485 int i;
4486 if ((name[0] == 'x') && (name[1] == 'm') &&
4487 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004489 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 return(name);
4491 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004492 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004493 return(name);
4494 }
4495 for (i = 0;;i++) {
4496 if (xmlW3CPIs[i] == NULL) break;
4497 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4498 return(name);
4499 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004500 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4501 "xmlParsePITarget: invalid name prefix 'xml'\n",
4502 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
4504 return(name);
4505}
4506
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004507#ifdef LIBXML_CATALOG_ENABLED
4508/**
4509 * xmlParseCatalogPI:
4510 * @ctxt: an XML parser context
4511 * @catalog: the PI value string
4512 *
4513 * parse an XML Catalog Processing Instruction.
4514 *
4515 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4516 *
4517 * Occurs only if allowed by the user and if happening in the Misc
4518 * part of the document before any doctype informations
4519 * This will add the given catalog to the parsing context in order
4520 * to be used if there is a resolution need further down in the document
4521 */
4522
4523static void
4524xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4525 xmlChar *URL = NULL;
4526 const xmlChar *tmp, *base;
4527 xmlChar marker;
4528
4529 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004530 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004531 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4532 goto error;
4533 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004534 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004535 if (*tmp != '=') {
4536 return;
4537 }
4538 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004539 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004540 marker = *tmp;
4541 if ((marker != '\'') && (marker != '"'))
4542 goto error;
4543 tmp++;
4544 base = tmp;
4545 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4546 if (*tmp == 0)
4547 goto error;
4548 URL = xmlStrndup(base, tmp - base);
4549 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004550 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004551 if (*tmp != 0)
4552 goto error;
4553
4554 if (URL != NULL) {
4555 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4556 xmlFree(URL);
4557 }
4558 return;
4559
4560error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004561 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4562 "Catalog PI syntax error: %s\n",
4563 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004564 if (URL != NULL)
4565 xmlFree(URL);
4566}
4567#endif
4568
Owen Taylor3473f882001-02-23 17:55:21 +00004569/**
4570 * xmlParsePI:
4571 * @ctxt: an XML parser context
4572 *
4573 * parse an XML Processing Instruction.
4574 *
4575 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4576 *
4577 * The processing is transfered to SAX once parsed.
4578 */
4579
4580void
4581xmlParsePI(xmlParserCtxtPtr ctxt) {
4582 xmlChar *buf = NULL;
4583 int len = 0;
4584 int size = XML_PARSER_BUFFER_SIZE;
4585 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004586 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004587 xmlParserInputState state;
4588 int count = 0;
4589
4590 if ((RAW == '<') && (NXT(1) == '?')) {
4591 xmlParserInputPtr input = ctxt->input;
4592 state = ctxt->instate;
4593 ctxt->instate = XML_PARSER_PI;
4594 /*
4595 * this is a Processing Instruction.
4596 */
4597 SKIP(2);
4598 SHRINK;
4599
4600 /*
4601 * Parse the target name and check for special support like
4602 * namespace.
4603 */
4604 target = xmlParsePITarget(ctxt);
4605 if (target != NULL) {
4606 if ((RAW == '?') && (NXT(1) == '>')) {
4607 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4609 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004610 }
4611 SKIP(2);
4612
4613 /*
4614 * SAX: PI detected.
4615 */
4616 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4617 (ctxt->sax->processingInstruction != NULL))
4618 ctxt->sax->processingInstruction(ctxt->userData,
4619 target, NULL);
4620 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 return;
4622 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004623 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004624 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004625 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004626 ctxt->instate = state;
4627 return;
4628 }
4629 cur = CUR;
4630 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004631 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4632 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 }
4634 SKIP_BLANKS;
4635 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004636 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004637 ((cur != '?') || (NXT(1) != '>'))) {
4638 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004639 xmlChar *tmp;
4640
Owen Taylor3473f882001-02-23 17:55:21 +00004641 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004642 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4643 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004644 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004645 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004646 ctxt->instate = state;
4647 return;
4648 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004649 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004650 }
4651 count++;
4652 if (count > 50) {
4653 GROW;
4654 count = 0;
4655 }
4656 COPY_BUF(l,buf,len,cur);
4657 NEXTL(l);
4658 cur = CUR_CHAR(l);
4659 if (cur == 0) {
4660 SHRINK;
4661 GROW;
4662 cur = CUR_CHAR(l);
4663 }
4664 }
4665 buf[len] = 0;
4666 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004667 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4668 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004669 } else {
4670 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004671 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4672 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004673 }
4674 SKIP(2);
4675
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004676#ifdef LIBXML_CATALOG_ENABLED
4677 if (((state == XML_PARSER_MISC) ||
4678 (state == XML_PARSER_START)) &&
4679 (xmlStrEqual(target, XML_CATALOG_PI))) {
4680 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4681 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4682 (allow == XML_CATA_ALLOW_ALL))
4683 xmlParseCatalogPI(ctxt, buf);
4684 }
4685#endif
4686
4687
Owen Taylor3473f882001-02-23 17:55:21 +00004688 /*
4689 * SAX: PI detected.
4690 */
4691 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4692 (ctxt->sax->processingInstruction != NULL))
4693 ctxt->sax->processingInstruction(ctxt->userData,
4694 target, buf);
4695 }
4696 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004698 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 }
4700 ctxt->instate = state;
4701 }
4702}
4703
4704/**
4705 * xmlParseNotationDecl:
4706 * @ctxt: an XML parser context
4707 *
4708 * parse a notation declaration
4709 *
4710 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4711 *
4712 * Hence there is actually 3 choices:
4713 * 'PUBLIC' S PubidLiteral
4714 * 'PUBLIC' S PubidLiteral S SystemLiteral
4715 * and 'SYSTEM' S SystemLiteral
4716 *
4717 * See the NOTE on xmlParseExternalID().
4718 */
4719
4720void
4721xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004722 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 xmlChar *Pubid;
4724 xmlChar *Systemid;
4725
Daniel Veillarda07050d2003-10-19 14:46:32 +00004726 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004727 xmlParserInputPtr input = ctxt->input;
4728 SHRINK;
4729 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004730 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004731 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4732 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004733 return;
4734 }
4735 SKIP_BLANKS;
4736
Daniel Veillard76d66f42001-05-16 21:05:17 +00004737 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004739 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 return;
4741 }
William M. Brack76e95df2003-10-18 16:20:14 +00004742 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004744 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004745 return;
4746 }
4747 SKIP_BLANKS;
4748
4749 /*
4750 * Parse the IDs.
4751 */
4752 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4753 SKIP_BLANKS;
4754
4755 if (RAW == '>') {
4756 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004757 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4758 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004759 }
4760 NEXT;
4761 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4762 (ctxt->sax->notationDecl != NULL))
4763 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4764 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004765 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004766 }
Owen Taylor3473f882001-02-23 17:55:21 +00004767 if (Systemid != NULL) xmlFree(Systemid);
4768 if (Pubid != NULL) xmlFree(Pubid);
4769 }
4770}
4771
4772/**
4773 * xmlParseEntityDecl:
4774 * @ctxt: an XML parser context
4775 *
4776 * parse <!ENTITY declarations
4777 *
4778 * [70] EntityDecl ::= GEDecl | PEDecl
4779 *
4780 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4781 *
4782 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4783 *
4784 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4785 *
4786 * [74] PEDef ::= EntityValue | ExternalID
4787 *
4788 * [76] NDataDecl ::= S 'NDATA' S Name
4789 *
4790 * [ VC: Notation Declared ]
4791 * The Name must match the declared name of a notation.
4792 */
4793
4794void
4795xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004796 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004797 xmlChar *value = NULL;
4798 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004799 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004800 int isParameter = 0;
4801 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004802 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004803
Daniel Veillard4c778d82005-01-23 17:37:44 +00004804 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004805 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004806 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 SHRINK;
4808 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004809 skipped = SKIP_BLANKS;
4810 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004811 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4812 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004813 }
Owen Taylor3473f882001-02-23 17:55:21 +00004814
4815 if (RAW == '%') {
4816 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004817 skipped = SKIP_BLANKS;
4818 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004819 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4820 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004821 }
Owen Taylor3473f882001-02-23 17:55:21 +00004822 isParameter = 1;
4823 }
4824
Daniel Veillard76d66f42001-05-16 21:05:17 +00004825 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004826 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004827 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4828 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004829 return;
4830 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004831 skipped = SKIP_BLANKS;
4832 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004833 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4834 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836
Daniel Veillardf5582f12002-06-11 10:08:16 +00004837 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004838 /*
4839 * handle the various case of definitions...
4840 */
4841 if (isParameter) {
4842 if ((RAW == '"') || (RAW == '\'')) {
4843 value = xmlParseEntityValue(ctxt, &orig);
4844 if (value) {
4845 if ((ctxt->sax != NULL) &&
4846 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4847 ctxt->sax->entityDecl(ctxt->userData, name,
4848 XML_INTERNAL_PARAMETER_ENTITY,
4849 NULL, NULL, value);
4850 }
4851 } else {
4852 URI = xmlParseExternalID(ctxt, &literal, 1);
4853 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004854 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 }
4856 if (URI) {
4857 xmlURIPtr uri;
4858
4859 uri = xmlParseURI((const char *) URI);
4860 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004861 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4862 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004863 /*
4864 * This really ought to be a well formedness error
4865 * but the XML Core WG decided otherwise c.f. issue
4866 * E26 of the XML erratas.
4867 */
Owen Taylor3473f882001-02-23 17:55:21 +00004868 } else {
4869 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004870 /*
4871 * Okay this is foolish to block those but not
4872 * invalid URIs.
4873 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004874 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004875 } else {
4876 if ((ctxt->sax != NULL) &&
4877 (!ctxt->disableSAX) &&
4878 (ctxt->sax->entityDecl != NULL))
4879 ctxt->sax->entityDecl(ctxt->userData, name,
4880 XML_EXTERNAL_PARAMETER_ENTITY,
4881 literal, URI, NULL);
4882 }
4883 xmlFreeURI(uri);
4884 }
4885 }
4886 }
4887 } else {
4888 if ((RAW == '"') || (RAW == '\'')) {
4889 value = xmlParseEntityValue(ctxt, &orig);
4890 if ((ctxt->sax != NULL) &&
4891 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4892 ctxt->sax->entityDecl(ctxt->userData, name,
4893 XML_INTERNAL_GENERAL_ENTITY,
4894 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004895 /*
4896 * For expat compatibility in SAX mode.
4897 */
4898 if ((ctxt->myDoc == NULL) ||
4899 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4900 if (ctxt->myDoc == NULL) {
4901 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004902 if (ctxt->myDoc == NULL) {
4903 xmlErrMemory(ctxt, "New Doc failed");
4904 return;
4905 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004906 }
4907 if (ctxt->myDoc->intSubset == NULL)
4908 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4909 BAD_CAST "fake", NULL, NULL);
4910
Daniel Veillard1af9a412003-08-20 22:54:39 +00004911 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4912 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004913 }
Owen Taylor3473f882001-02-23 17:55:21 +00004914 } else {
4915 URI = xmlParseExternalID(ctxt, &literal, 1);
4916 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004917 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004918 }
4919 if (URI) {
4920 xmlURIPtr uri;
4921
4922 uri = xmlParseURI((const char *)URI);
4923 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004924 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4925 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004926 /*
4927 * This really ought to be a well formedness error
4928 * but the XML Core WG decided otherwise c.f. issue
4929 * E26 of the XML erratas.
4930 */
Owen Taylor3473f882001-02-23 17:55:21 +00004931 } else {
4932 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004933 /*
4934 * Okay this is foolish to block those but not
4935 * invalid URIs.
4936 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004937 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
4939 xmlFreeURI(uri);
4940 }
4941 }
William M. Brack76e95df2003-10-18 16:20:14 +00004942 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004943 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4944 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004945 }
4946 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004947 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004948 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004949 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004950 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4951 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004952 }
4953 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004954 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4956 (ctxt->sax->unparsedEntityDecl != NULL))
4957 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4958 literal, URI, ndata);
4959 } else {
4960 if ((ctxt->sax != NULL) &&
4961 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4962 ctxt->sax->entityDecl(ctxt->userData, name,
4963 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4964 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004965 /*
4966 * For expat compatibility in SAX mode.
4967 * assuming the entity repalcement was asked for
4968 */
4969 if ((ctxt->replaceEntities != 0) &&
4970 ((ctxt->myDoc == NULL) ||
4971 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4972 if (ctxt->myDoc == NULL) {
4973 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004974 if (ctxt->myDoc == NULL) {
4975 xmlErrMemory(ctxt, "New Doc failed");
4976 return;
4977 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004978 }
4979
4980 if (ctxt->myDoc->intSubset == NULL)
4981 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4982 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004983 xmlSAX2EntityDecl(ctxt, name,
4984 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4985 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004986 }
Owen Taylor3473f882001-02-23 17:55:21 +00004987 }
4988 }
4989 }
4990 SKIP_BLANKS;
4991 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004992 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004993 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004994 } else {
4995 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004996 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4997 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004998 }
4999 NEXT;
5000 }
5001 if (orig != NULL) {
5002 /*
5003 * Ugly mechanism to save the raw entity value.
5004 */
5005 xmlEntityPtr cur = NULL;
5006
5007 if (isParameter) {
5008 if ((ctxt->sax != NULL) &&
5009 (ctxt->sax->getParameterEntity != NULL))
5010 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5011 } else {
5012 if ((ctxt->sax != NULL) &&
5013 (ctxt->sax->getEntity != NULL))
5014 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005015 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005016 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005017 }
Owen Taylor3473f882001-02-23 17:55:21 +00005018 }
5019 if (cur != NULL) {
5020 if (cur->orig != NULL)
5021 xmlFree(orig);
5022 else
5023 cur->orig = orig;
5024 } else
5025 xmlFree(orig);
5026 }
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (value != NULL) xmlFree(value);
5028 if (URI != NULL) xmlFree(URI);
5029 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005030 }
5031}
5032
5033/**
5034 * xmlParseDefaultDecl:
5035 * @ctxt: an XML parser context
5036 * @value: Receive a possible fixed default value for the attribute
5037 *
5038 * Parse an attribute default declaration
5039 *
5040 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5041 *
5042 * [ VC: Required Attribute ]
5043 * if the default declaration is the keyword #REQUIRED, then the
5044 * attribute must be specified for all elements of the type in the
5045 * attribute-list declaration.
5046 *
5047 * [ VC: Attribute Default Legal ]
5048 * The declared default value must meet the lexical constraints of
5049 * the declared attribute type c.f. xmlValidateAttributeDecl()
5050 *
5051 * [ VC: Fixed Attribute Default ]
5052 * if an attribute has a default value declared with the #FIXED
5053 * keyword, instances of that attribute must match the default value.
5054 *
5055 * [ WFC: No < in Attribute Values ]
5056 * handled in xmlParseAttValue()
5057 *
5058 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5059 * or XML_ATTRIBUTE_FIXED.
5060 */
5061
5062int
5063xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5064 int val;
5065 xmlChar *ret;
5066
5067 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005068 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005069 SKIP(9);
5070 return(XML_ATTRIBUTE_REQUIRED);
5071 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005072 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005073 SKIP(8);
5074 return(XML_ATTRIBUTE_IMPLIED);
5075 }
5076 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005077 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005078 SKIP(6);
5079 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005080 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005081 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5082 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005083 }
5084 SKIP_BLANKS;
5085 }
5086 ret = xmlParseAttValue(ctxt);
5087 ctxt->instate = XML_PARSER_DTD;
5088 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005089 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005090 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005091 } else
5092 *value = ret;
5093 return(val);
5094}
5095
5096/**
5097 * xmlParseNotationType:
5098 * @ctxt: an XML parser context
5099 *
5100 * parse an Notation attribute type.
5101 *
5102 * Note: the leading 'NOTATION' S part has already being parsed...
5103 *
5104 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5105 *
5106 * [ VC: Notation Attributes ]
5107 * Values of this type must match one of the notation names included
5108 * in the declaration; all notation names in the declaration must be declared.
5109 *
5110 * Returns: the notation attribute tree built while parsing
5111 */
5112
5113xmlEnumerationPtr
5114xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005115 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5117
5118 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005119 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005120 return(NULL);
5121 }
5122 SHRINK;
5123 do {
5124 NEXT;
5125 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005126 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5129 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005130 return(ret);
5131 }
5132 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 if (cur == NULL) return(ret);
5134 if (last == NULL) ret = last = cur;
5135 else {
5136 last->next = cur;
5137 last = cur;
5138 }
5139 SKIP_BLANKS;
5140 } while (RAW == '|');
5141 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005142 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005143 if ((last != NULL) && (last != ret))
5144 xmlFreeEnumeration(last);
5145 return(ret);
5146 }
5147 NEXT;
5148 return(ret);
5149}
5150
5151/**
5152 * xmlParseEnumerationType:
5153 * @ctxt: an XML parser context
5154 *
5155 * parse an Enumeration attribute type.
5156 *
5157 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5158 *
5159 * [ VC: Enumeration ]
5160 * Values of this type must match one of the Nmtoken tokens in
5161 * the declaration
5162 *
5163 * Returns: the enumeration attribute tree built while parsing
5164 */
5165
5166xmlEnumerationPtr
5167xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5168 xmlChar *name;
5169 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5170
5171 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005172 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005173 return(NULL);
5174 }
5175 SHRINK;
5176 do {
5177 NEXT;
5178 SKIP_BLANKS;
5179 name = xmlParseNmtoken(ctxt);
5180 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005181 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005182 return(ret);
5183 }
5184 cur = xmlCreateEnumeration(name);
5185 xmlFree(name);
5186 if (cur == NULL) return(ret);
5187 if (last == NULL) ret = last = cur;
5188 else {
5189 last->next = cur;
5190 last = cur;
5191 }
5192 SKIP_BLANKS;
5193 } while (RAW == '|');
5194 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005195 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 return(ret);
5197 }
5198 NEXT;
5199 return(ret);
5200}
5201
5202/**
5203 * xmlParseEnumeratedType:
5204 * @ctxt: an XML parser context
5205 * @tree: the enumeration tree built while parsing
5206 *
5207 * parse an Enumerated attribute type.
5208 *
5209 * [57] EnumeratedType ::= NotationType | Enumeration
5210 *
5211 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5212 *
5213 *
5214 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5215 */
5216
5217int
5218xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005219 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005220 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005221 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005222 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5223 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005224 return(0);
5225 }
5226 SKIP_BLANKS;
5227 *tree = xmlParseNotationType(ctxt);
5228 if (*tree == NULL) return(0);
5229 return(XML_ATTRIBUTE_NOTATION);
5230 }
5231 *tree = xmlParseEnumerationType(ctxt);
5232 if (*tree == NULL) return(0);
5233 return(XML_ATTRIBUTE_ENUMERATION);
5234}
5235
5236/**
5237 * xmlParseAttributeType:
5238 * @ctxt: an XML parser context
5239 * @tree: the enumeration tree built while parsing
5240 *
5241 * parse the Attribute list def for an element
5242 *
5243 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5244 *
5245 * [55] StringType ::= 'CDATA'
5246 *
5247 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5248 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5249 *
5250 * Validity constraints for attribute values syntax are checked in
5251 * xmlValidateAttributeValue()
5252 *
5253 * [ VC: ID ]
5254 * Values of type ID must match the Name production. A name must not
5255 * appear more than once in an XML document as a value of this type;
5256 * i.e., ID values must uniquely identify the elements which bear them.
5257 *
5258 * [ VC: One ID per Element Type ]
5259 * No element type may have more than one ID attribute specified.
5260 *
5261 * [ VC: ID Attribute Default ]
5262 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5263 *
5264 * [ VC: IDREF ]
5265 * Values of type IDREF must match the Name production, and values
5266 * of type IDREFS must match Names; each IDREF Name must match the value
5267 * of an ID attribute on some element in the XML document; i.e. IDREF
5268 * values must match the value of some ID attribute.
5269 *
5270 * [ VC: Entity Name ]
5271 * Values of type ENTITY must match the Name production, values
5272 * of type ENTITIES must match Names; each Entity Name must match the
5273 * name of an unparsed entity declared in the DTD.
5274 *
5275 * [ VC: Name Token ]
5276 * Values of type NMTOKEN must match the Nmtoken production; values
5277 * of type NMTOKENS must match Nmtokens.
5278 *
5279 * Returns the attribute type
5280 */
5281int
5282xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5283 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005284 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005285 SKIP(5);
5286 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005287 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005288 SKIP(6);
5289 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005290 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005291 SKIP(5);
5292 return(XML_ATTRIBUTE_IDREF);
5293 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5294 SKIP(2);
5295 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005296 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005297 SKIP(6);
5298 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005299 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005300 SKIP(8);
5301 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005302 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005303 SKIP(8);
5304 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005305 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005306 SKIP(7);
5307 return(XML_ATTRIBUTE_NMTOKEN);
5308 }
5309 return(xmlParseEnumeratedType(ctxt, tree));
5310}
5311
5312/**
5313 * xmlParseAttributeListDecl:
5314 * @ctxt: an XML parser context
5315 *
5316 * : parse the Attribute list def for an element
5317 *
5318 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5319 *
5320 * [53] AttDef ::= S Name S AttType S DefaultDecl
5321 *
5322 */
5323void
5324xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005325 const xmlChar *elemName;
5326 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005327 xmlEnumerationPtr tree;
5328
Daniel Veillarda07050d2003-10-19 14:46:32 +00005329 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005330 xmlParserInputPtr input = ctxt->input;
5331
5332 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005333 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005335 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005336 }
5337 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005338 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005339 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005340 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5341 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005342 return;
5343 }
5344 SKIP_BLANKS;
5345 GROW;
5346 while (RAW != '>') {
5347 const xmlChar *check = CUR_PTR;
5348 int type;
5349 int def;
5350 xmlChar *defaultValue = NULL;
5351
5352 GROW;
5353 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005354 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5357 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005358 break;
5359 }
5360 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005361 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005362 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005363 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005364 break;
5365 }
5366 SKIP_BLANKS;
5367
5368 type = xmlParseAttributeType(ctxt, &tree);
5369 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005370 break;
5371 }
5372
5373 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005374 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005377 if (tree != NULL)
5378 xmlFreeEnumeration(tree);
5379 break;
5380 }
5381 SKIP_BLANKS;
5382
5383 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5384 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005385 if (defaultValue != NULL)
5386 xmlFree(defaultValue);
5387 if (tree != NULL)
5388 xmlFreeEnumeration(tree);
5389 break;
5390 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005391 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5392 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005393
5394 GROW;
5395 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005396 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005397 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005398 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005399 if (defaultValue != NULL)
5400 xmlFree(defaultValue);
5401 if (tree != NULL)
5402 xmlFreeEnumeration(tree);
5403 break;
5404 }
5405 SKIP_BLANKS;
5406 }
5407 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005408 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5409 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005410 if (defaultValue != NULL)
5411 xmlFree(defaultValue);
5412 if (tree != NULL)
5413 xmlFreeEnumeration(tree);
5414 break;
5415 }
5416 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5417 (ctxt->sax->attributeDecl != NULL))
5418 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5419 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005420 else if (tree != NULL)
5421 xmlFreeEnumeration(tree);
5422
5423 if ((ctxt->sax2) && (defaultValue != NULL) &&
5424 (def != XML_ATTRIBUTE_IMPLIED) &&
5425 (def != XML_ATTRIBUTE_REQUIRED)) {
5426 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5427 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005428 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005429 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5430 }
Owen Taylor3473f882001-02-23 17:55:21 +00005431 if (defaultValue != NULL)
5432 xmlFree(defaultValue);
5433 GROW;
5434 }
5435 if (RAW == '>') {
5436 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005437 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5438 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005439 }
5440 NEXT;
5441 }
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
5443}
5444
5445/**
5446 * xmlParseElementMixedContentDecl:
5447 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005448 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005449 *
5450 * parse the declaration for a Mixed Element content
5451 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5452 *
5453 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5454 * '(' S? '#PCDATA' S? ')'
5455 *
5456 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5457 *
5458 * [ VC: No Duplicate Types ]
5459 * The same name must not appear more than once in a single
5460 * mixed-content declaration.
5461 *
5462 * returns: the list of the xmlElementContentPtr describing the element choices
5463 */
5464xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005466 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005467 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005468
5469 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005470 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005471 SKIP(7);
5472 SKIP_BLANKS;
5473 SHRINK;
5474 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005475 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005476 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5477"Element content declaration doesn't start and stop in the same entity\n",
5478 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005479 }
Owen Taylor3473f882001-02-23 17:55:21 +00005480 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005481 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005482 if (ret == NULL)
5483 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 if (RAW == '*') {
5485 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5486 NEXT;
5487 }
5488 return(ret);
5489 }
5490 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005491 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005492 if (ret == NULL) return(NULL);
5493 }
5494 while (RAW == '|') {
5495 NEXT;
5496 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005497 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (ret == NULL) return(NULL);
5499 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005500 if (cur != NULL)
5501 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005502 cur = ret;
5503 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005504 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005505 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005506 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005507 if (n->c1 != NULL)
5508 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005509 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005510 if (n != NULL)
5511 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005512 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005513 }
5514 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005515 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005516 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005517 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005518 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005519 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005520 return(NULL);
5521 }
5522 SKIP_BLANKS;
5523 GROW;
5524 }
5525 if ((RAW == ')') && (NXT(1) == '*')) {
5526 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005527 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005528 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005529 if (cur->c2 != NULL)
5530 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
5532 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5535"Element content declaration doesn't start and stop in the same entity\n",
5536 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005537 }
Owen Taylor3473f882001-02-23 17:55:21 +00005538 SKIP(2);
5539 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005540 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005541 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 return(NULL);
5543 }
5544
5545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005546 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 }
5548 return(ret);
5549}
5550
5551/**
5552 * xmlParseElementChildrenContentDecl:
5553 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005554 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005555 *
5556 * parse the declaration for a Mixed Element content
5557 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5558 *
5559 *
5560 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5561 *
5562 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5563 *
5564 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5565 *
5566 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5567 *
5568 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5569 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005570 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005571 * opening or closing parentheses in a choice, seq, or Mixed
5572 * construct is contained in the replacement text for a parameter
5573 * entity, both must be contained in the same replacement text. For
5574 * interoperability, if a parameter-entity reference appears in a
5575 * choice, seq, or Mixed construct, its replacement text should not
5576 * be empty, and neither the first nor last non-blank character of
5577 * the replacement text should be a connector (| or ,).
5578 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005579 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005580 * hierarchy.
5581 */
5582xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005583xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005584 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005585 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005586 xmlChar type = 0;
5587
5588 SKIP_BLANKS;
5589 GROW;
5590 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005591 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005592
Owen Taylor3473f882001-02-23 17:55:21 +00005593 /* Recurse on first child */
5594 NEXT;
5595 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005596 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005597 SKIP_BLANKS;
5598 GROW;
5599 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005600 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005601 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005602 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005603 return(NULL);
5604 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005605 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005606 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005607 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005608 return(NULL);
5609 }
Owen Taylor3473f882001-02-23 17:55:21 +00005610 GROW;
5611 if (RAW == '?') {
5612 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5613 NEXT;
5614 } else if (RAW == '*') {
5615 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5616 NEXT;
5617 } else if (RAW == '+') {
5618 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5619 NEXT;
5620 } else {
5621 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5622 }
Owen Taylor3473f882001-02-23 17:55:21 +00005623 GROW;
5624 }
5625 SKIP_BLANKS;
5626 SHRINK;
5627 while (RAW != ')') {
5628 /*
5629 * Each loop we parse one separator and one element.
5630 */
5631 if (RAW == ',') {
5632 if (type == 0) type = CUR;
5633
5634 /*
5635 * Detect "Name | Name , Name" error
5636 */
5637 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005638 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005639 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005641 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005642 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005644 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005645 return(NULL);
5646 }
5647 NEXT;
5648
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005649 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005650 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005651 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005652 xmlFreeDocElementContent(ctxt->myDoc, last);
5653 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005654 return(NULL);
5655 }
5656 if (last == NULL) {
5657 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005658 if (ret != NULL)
5659 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005660 ret = cur = op;
5661 } else {
5662 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005663 if (op != NULL)
5664 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005665 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005666 if (last != NULL)
5667 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005668 cur =op;
5669 last = NULL;
5670 }
5671 } else if (RAW == '|') {
5672 if (type == 0) type = CUR;
5673
5674 /*
5675 * Detect "Name , Name | Name" error
5676 */
5677 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005678 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005679 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005680 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005681 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005682 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005683 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005684 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005685 return(NULL);
5686 }
5687 NEXT;
5688
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005689 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005690 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005691 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005692 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005693 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005694 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005695 return(NULL);
5696 }
5697 if (last == NULL) {
5698 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005699 if (ret != NULL)
5700 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005701 ret = cur = op;
5702 } else {
5703 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005704 if (op != NULL)
5705 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005706 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005707 if (last != NULL)
5708 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005709 cur =op;
5710 last = NULL;
5711 }
5712 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005713 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005714 if ((last != NULL) && (last != ret))
5715 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005717 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005718 return(NULL);
5719 }
5720 GROW;
5721 SKIP_BLANKS;
5722 GROW;
5723 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005724 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005725 /* Recurse on second child */
5726 NEXT;
5727 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005728 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 SKIP_BLANKS;
5730 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005731 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005732 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005733 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005734 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005735 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 return(NULL);
5737 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005739 if (last == NULL) {
5740 if (ret != NULL)
5741 xmlFreeDocElementContent(ctxt->myDoc, ret);
5742 return(NULL);
5743 }
Owen Taylor3473f882001-02-23 17:55:21 +00005744 if (RAW == '?') {
5745 last->ocur = XML_ELEMENT_CONTENT_OPT;
5746 NEXT;
5747 } else if (RAW == '*') {
5748 last->ocur = XML_ELEMENT_CONTENT_MULT;
5749 NEXT;
5750 } else if (RAW == '+') {
5751 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5752 NEXT;
5753 } else {
5754 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5755 }
5756 }
5757 SKIP_BLANKS;
5758 GROW;
5759 }
5760 if ((cur != NULL) && (last != NULL)) {
5761 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005762 if (last != NULL)
5763 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005764 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005766 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5767"Element content declaration doesn't start and stop in the same entity\n",
5768 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005769 }
Owen Taylor3473f882001-02-23 17:55:21 +00005770 NEXT;
5771 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005772 if (ret != NULL) {
5773 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5774 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5775 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5776 else
5777 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5778 }
Owen Taylor3473f882001-02-23 17:55:21 +00005779 NEXT;
5780 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005781 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005782 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005783 cur = ret;
5784 /*
5785 * Some normalization:
5786 * (a | b* | c?)* == (a | b | c)*
5787 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005788 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005789 if ((cur->c1 != NULL) &&
5790 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5791 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5792 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5793 if ((cur->c2 != NULL) &&
5794 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5795 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5796 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5797 cur = cur->c2;
5798 }
5799 }
Owen Taylor3473f882001-02-23 17:55:21 +00005800 NEXT;
5801 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005802 if (ret != NULL) {
5803 int found = 0;
5804
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005805 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5806 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5807 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005808 else
5809 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005810 /*
5811 * Some normalization:
5812 * (a | b*)+ == (a | b)*
5813 * (a | b?)+ == (a | b)*
5814 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005815 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005816 if ((cur->c1 != NULL) &&
5817 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5818 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5819 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5820 found = 1;
5821 }
5822 if ((cur->c2 != NULL) &&
5823 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5824 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5825 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5826 found = 1;
5827 }
5828 cur = cur->c2;
5829 }
5830 if (found)
5831 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5832 }
Owen Taylor3473f882001-02-23 17:55:21 +00005833 NEXT;
5834 }
5835 return(ret);
5836}
5837
5838/**
5839 * xmlParseElementContentDecl:
5840 * @ctxt: an XML parser context
5841 * @name: the name of the element being defined.
5842 * @result: the Element Content pointer will be stored here if any
5843 *
5844 * parse the declaration for an Element content either Mixed or Children,
5845 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5846 *
5847 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5848 *
5849 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5850 */
5851
5852int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005853xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005854 xmlElementContentPtr *result) {
5855
5856 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005857 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005858 int res;
5859
5860 *result = NULL;
5861
5862 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005863 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005864 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005865 return(-1);
5866 }
5867 NEXT;
5868 GROW;
5869 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005870 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005871 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005872 res = XML_ELEMENT_TYPE_MIXED;
5873 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005874 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005875 res = XML_ELEMENT_TYPE_ELEMENT;
5876 }
Owen Taylor3473f882001-02-23 17:55:21 +00005877 SKIP_BLANKS;
5878 *result = tree;
5879 return(res);
5880}
5881
5882/**
5883 * xmlParseElementDecl:
5884 * @ctxt: an XML parser context
5885 *
5886 * parse an Element declaration.
5887 *
5888 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5889 *
5890 * [ VC: Unique Element Type Declaration ]
5891 * No element type may be declared more than once
5892 *
5893 * Returns the type of the element, or -1 in case of error
5894 */
5895int
5896xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005897 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005898 int ret = -1;
5899 xmlElementContentPtr content = NULL;
5900
Daniel Veillard4c778d82005-01-23 17:37:44 +00005901 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005902 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005903 xmlParserInputPtr input = ctxt->input;
5904
5905 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005906 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005907 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5908 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005909 }
5910 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005911 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005913 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5914 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005915 return(-1);
5916 }
5917 while ((RAW == 0) && (ctxt->inputNr > 1))
5918 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5921 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005922 }
5923 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005924 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005925 SKIP(5);
5926 /*
5927 * Element must always be empty.
5928 */
5929 ret = XML_ELEMENT_TYPE_EMPTY;
5930 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5931 (NXT(2) == 'Y')) {
5932 SKIP(3);
5933 /*
5934 * Element is a generic container.
5935 */
5936 ret = XML_ELEMENT_TYPE_ANY;
5937 } else if (RAW == '(') {
5938 ret = xmlParseElementContentDecl(ctxt, name, &content);
5939 } else {
5940 /*
5941 * [ WFC: PEs in Internal Subset ] error handling.
5942 */
5943 if ((RAW == '%') && (ctxt->external == 0) &&
5944 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005945 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005946 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005947 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005948 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005949 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5950 }
Owen Taylor3473f882001-02-23 17:55:21 +00005951 return(-1);
5952 }
5953
5954 SKIP_BLANKS;
5955 /*
5956 * Pop-up of finished entities.
5957 */
5958 while ((RAW == 0) && (ctxt->inputNr > 1))
5959 xmlPopInput(ctxt);
5960 SKIP_BLANKS;
5961
5962 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005963 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005964 if (content != NULL) {
5965 xmlFreeDocElementContent(ctxt->myDoc, content);
5966 }
Owen Taylor3473f882001-02-23 17:55:21 +00005967 } else {
5968 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005969 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5970 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005971 }
5972
5973 NEXT;
5974 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005975 (ctxt->sax->elementDecl != NULL)) {
5976 if (content != NULL)
5977 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005978 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5979 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005980 if ((content != NULL) && (content->parent == NULL)) {
5981 /*
5982 * this is a trick: if xmlAddElementDecl is called,
5983 * instead of copying the full tree it is plugged directly
5984 * if called from the parser. Avoid duplicating the
5985 * interfaces or change the API/ABI
5986 */
5987 xmlFreeDocElementContent(ctxt->myDoc, content);
5988 }
5989 } else if (content != NULL) {
5990 xmlFreeDocElementContent(ctxt->myDoc, content);
5991 }
Owen Taylor3473f882001-02-23 17:55:21 +00005992 }
Owen Taylor3473f882001-02-23 17:55:21 +00005993 }
5994 return(ret);
5995}
5996
5997/**
Owen Taylor3473f882001-02-23 17:55:21 +00005998 * xmlParseConditionalSections
5999 * @ctxt: an XML parser context
6000 *
6001 * [61] conditionalSect ::= includeSect | ignoreSect
6002 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6003 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6004 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6005 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6006 */
6007
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006008static void
Owen Taylor3473f882001-02-23 17:55:21 +00006009xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6010 SKIP(3);
6011 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006012 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006013 SKIP(7);
6014 SKIP_BLANKS;
6015 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006016 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006017 } else {
6018 NEXT;
6019 }
6020 if (xmlParserDebugEntities) {
6021 if ((ctxt->input != NULL) && (ctxt->input->filename))
6022 xmlGenericError(xmlGenericErrorContext,
6023 "%s(%d): ", ctxt->input->filename,
6024 ctxt->input->line);
6025 xmlGenericError(xmlGenericErrorContext,
6026 "Entering INCLUDE Conditional Section\n");
6027 }
6028
6029 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6030 (NXT(2) != '>'))) {
6031 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006032 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006033
6034 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6035 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006036 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006037 NEXT;
6038 } else if (RAW == '%') {
6039 xmlParsePEReference(ctxt);
6040 } else
6041 xmlParseMarkupDecl(ctxt);
6042
6043 /*
6044 * Pop-up of finished entities.
6045 */
6046 while ((RAW == 0) && (ctxt->inputNr > 1))
6047 xmlPopInput(ctxt);
6048
Daniel Veillardfdc91562002-07-01 21:52:03 +00006049 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006050 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 break;
6052 }
6053 }
6054 if (xmlParserDebugEntities) {
6055 if ((ctxt->input != NULL) && (ctxt->input->filename))
6056 xmlGenericError(xmlGenericErrorContext,
6057 "%s(%d): ", ctxt->input->filename,
6058 ctxt->input->line);
6059 xmlGenericError(xmlGenericErrorContext,
6060 "Leaving INCLUDE Conditional Section\n");
6061 }
6062
Daniel Veillarda07050d2003-10-19 14:46:32 +00006063 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006064 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006065 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006066 int depth = 0;
6067
6068 SKIP(6);
6069 SKIP_BLANKS;
6070 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006071 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 } else {
6073 NEXT;
6074 }
6075 if (xmlParserDebugEntities) {
6076 if ((ctxt->input != NULL) && (ctxt->input->filename))
6077 xmlGenericError(xmlGenericErrorContext,
6078 "%s(%d): ", ctxt->input->filename,
6079 ctxt->input->line);
6080 xmlGenericError(xmlGenericErrorContext,
6081 "Entering IGNORE Conditional Section\n");
6082 }
6083
6084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006086 * But disable SAX event generating DTD building in the meantime
6087 */
6088 state = ctxt->disableSAX;
6089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006091 ctxt->instate = XML_PARSER_IGNORE;
6092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6095 depth++;
6096 SKIP(3);
6097 continue;
6098 }
6099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6100 if (--depth >= 0) SKIP(3);
6101 continue;
6102 }
6103 NEXT;
6104 continue;
6105 }
6106
6107 ctxt->disableSAX = state;
6108 ctxt->instate = instate;
6109
6110 if (xmlParserDebugEntities) {
6111 if ((ctxt->input != NULL) && (ctxt->input->filename))
6112 xmlGenericError(xmlGenericErrorContext,
6113 "%s(%d): ", ctxt->input->filename,
6114 ctxt->input->line);
6115 xmlGenericError(xmlGenericErrorContext,
6116 "Leaving IGNORE Conditional Section\n");
6117 }
6118
6119 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006120 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 }
6122
6123 if (RAW == 0)
6124 SHRINK;
6125
6126 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006127 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006128 } else {
6129 SKIP(3);
6130 }
6131}
6132
6133/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006134 * xmlParseMarkupDecl:
6135 * @ctxt: an XML parser context
6136 *
6137 * parse Markup declarations
6138 *
6139 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6140 * NotationDecl | PI | Comment
6141 *
6142 * [ VC: Proper Declaration/PE Nesting ]
6143 * Parameter-entity replacement text must be properly nested with
6144 * markup declarations. That is to say, if either the first character
6145 * or the last character of a markup declaration (markupdecl above) is
6146 * contained in the replacement text for a parameter-entity reference,
6147 * both must be contained in the same replacement text.
6148 *
6149 * [ WFC: PEs in Internal Subset ]
6150 * In the internal DTD subset, parameter-entity references can occur
6151 * only where markup declarations can occur, not within markup declarations.
6152 * (This does not apply to references that occur in external parameter
6153 * entities or to the external subset.)
6154 */
6155void
6156xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6157 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006158 if (CUR == '<') {
6159 if (NXT(1) == '!') {
6160 switch (NXT(2)) {
6161 case 'E':
6162 if (NXT(3) == 'L')
6163 xmlParseElementDecl(ctxt);
6164 else if (NXT(3) == 'N')
6165 xmlParseEntityDecl(ctxt);
6166 break;
6167 case 'A':
6168 xmlParseAttributeListDecl(ctxt);
6169 break;
6170 case 'N':
6171 xmlParseNotationDecl(ctxt);
6172 break;
6173 case '-':
6174 xmlParseComment(ctxt);
6175 break;
6176 default:
6177 /* there is an error but it will be detected later */
6178 break;
6179 }
6180 } else if (NXT(1) == '?') {
6181 xmlParsePI(ctxt);
6182 }
6183 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006184 /*
6185 * This is only for internal subset. On external entities,
6186 * the replacement is done before parsing stage
6187 */
6188 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6189 xmlParsePEReference(ctxt);
6190
6191 /*
6192 * Conditional sections are allowed from entities included
6193 * by PE References in the internal subset.
6194 */
6195 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6196 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6197 xmlParseConditionalSections(ctxt);
6198 }
6199 }
6200
6201 ctxt->instate = XML_PARSER_DTD;
6202}
6203
6204/**
6205 * xmlParseTextDecl:
6206 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006207 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006208 * parse an XML declaration header for external entities
6209 *
6210 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006211 */
6212
6213void
6214xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6215 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006216 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006217
6218 /*
6219 * We know that '<?xml' is here.
6220 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006221 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006222 SKIP(5);
6223 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006224 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006225 return;
6226 }
6227
William M. Brack76e95df2003-10-18 16:20:14 +00006228 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006229 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6230 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006231 }
6232 SKIP_BLANKS;
6233
6234 /*
6235 * We may have the VersionInfo here.
6236 */
6237 version = xmlParseVersionInfo(ctxt);
6238 if (version == NULL)
6239 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006240 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006241 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006242 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6243 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006244 }
6245 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006246 ctxt->input->version = version;
6247
6248 /*
6249 * We must have the encoding declaration
6250 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006251 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006252 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6253 /*
6254 * The XML REC instructs us to stop parsing right here
6255 */
6256 return;
6257 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006258 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6259 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6260 "Missing encoding in text declaration\n");
6261 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006262
6263 SKIP_BLANKS;
6264 if ((RAW == '?') && (NXT(1) == '>')) {
6265 SKIP(2);
6266 } else if (RAW == '>') {
6267 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006268 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006269 NEXT;
6270 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006271 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006272 MOVETO_ENDTAG(CUR_PTR);
6273 NEXT;
6274 }
6275}
6276
6277/**
Owen Taylor3473f882001-02-23 17:55:21 +00006278 * xmlParseExternalSubset:
6279 * @ctxt: an XML parser context
6280 * @ExternalID: the external identifier
6281 * @SystemID: the system identifier (or URL)
6282 *
6283 * parse Markup declarations from an external subset
6284 *
6285 * [30] extSubset ::= textDecl? extSubsetDecl
6286 *
6287 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6288 */
6289void
6290xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6291 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006292 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006293 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006294
6295 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6296 (ctxt->input->end - ctxt->input->cur >= 4)) {
6297 xmlChar start[4];
6298 xmlCharEncoding enc;
6299
6300 start[0] = RAW;
6301 start[1] = NXT(1);
6302 start[2] = NXT(2);
6303 start[3] = NXT(3);
6304 enc = xmlDetectCharEncoding(start, 4);
6305 if (enc != XML_CHAR_ENCODING_NONE)
6306 xmlSwitchEncoding(ctxt, enc);
6307 }
6308
Daniel Veillarda07050d2003-10-19 14:46:32 +00006309 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006310 xmlParseTextDecl(ctxt);
6311 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6312 /*
6313 * The XML REC instructs us to stop parsing right here
6314 */
6315 ctxt->instate = XML_PARSER_EOF;
6316 return;
6317 }
6318 }
6319 if (ctxt->myDoc == NULL) {
6320 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6321 }
6322 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6323 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6324
6325 ctxt->instate = XML_PARSER_DTD;
6326 ctxt->external = 1;
6327 while (((RAW == '<') && (NXT(1) == '?')) ||
6328 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006329 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006330 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006331 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006332
6333 GROW;
6334 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6335 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006336 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006337 NEXT;
6338 } else if (RAW == '%') {
6339 xmlParsePEReference(ctxt);
6340 } else
6341 xmlParseMarkupDecl(ctxt);
6342
6343 /*
6344 * Pop-up of finished entities.
6345 */
6346 while ((RAW == 0) && (ctxt->inputNr > 1))
6347 xmlPopInput(ctxt);
6348
Daniel Veillardfdc91562002-07-01 21:52:03 +00006349 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006350 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 break;
6352 }
6353 }
6354
6355 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006356 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006357 }
6358
6359}
6360
6361/**
6362 * xmlParseReference:
6363 * @ctxt: an XML parser context
6364 *
6365 * parse and handle entity references in content, depending on the SAX
6366 * interface, this may end-up in a call to character() if this is a
6367 * CharRef, a predefined entity, if there is no reference() callback.
6368 * or if the parser was asked to switch to that mode.
6369 *
6370 * [67] Reference ::= EntityRef | CharRef
6371 */
6372void
6373xmlParseReference(xmlParserCtxtPtr ctxt) {
6374 xmlEntityPtr ent;
6375 xmlChar *val;
6376 if (RAW != '&') return;
6377
6378 if (NXT(1) == '#') {
6379 int i = 0;
6380 xmlChar out[10];
6381 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006382 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006383
Daniel Veillarddc171602008-03-26 17:41:38 +00006384 if (value == 0)
6385 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006386 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6387 /*
6388 * So we are using non-UTF-8 buffers
6389 * Check that the char fit on 8bits, if not
6390 * generate a CharRef.
6391 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006392 if (value <= 0xFF) {
6393 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006394 out[1] = 0;
6395 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6396 (!ctxt->disableSAX))
6397 ctxt->sax->characters(ctxt->userData, out, 1);
6398 } else {
6399 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006400 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006402 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006403 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6404 (!ctxt->disableSAX))
6405 ctxt->sax->reference(ctxt->userData, out);
6406 }
6407 } else {
6408 /*
6409 * Just encode the value in UTF-8
6410 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006411 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 out[i] = 0;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6414 (!ctxt->disableSAX))
6415 ctxt->sax->characters(ctxt->userData, out, i);
6416 }
6417 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006418 int was_checked;
6419
Owen Taylor3473f882001-02-23 17:55:21 +00006420 ent = xmlParseEntityRef(ctxt);
6421 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006422 if (!ctxt->wellFormed)
6423 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006424 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006425 if ((ent->name != NULL) &&
6426 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6427 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006428 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006429
6430
6431 /*
6432 * The first reference to the entity trigger a parsing phase
6433 * where the ent->children is filled with the result from
6434 * the parsing.
6435 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006436 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006437 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006438
Owen Taylor3473f882001-02-23 17:55:21 +00006439 value = ent->content;
6440
6441 /*
6442 * Check that this entity is well formed
6443 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006444 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006445 (value[1] == 0) && (value[0] == '<') &&
6446 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6447 /*
6448 * DONE: get definite answer on this !!!
6449 * Lots of entity decls are used to declare a single
6450 * char
6451 * <!ENTITY lt "<">
6452 * Which seems to be valid since
6453 * 2.4: The ampersand character (&) and the left angle
6454 * bracket (<) may appear in their literal form only
6455 * when used ... They are also legal within the literal
6456 * entity value of an internal entity declaration;i
6457 * see "4.3.2 Well-Formed Parsed Entities".
6458 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6459 * Looking at the OASIS test suite and James Clark
6460 * tests, this is broken. However the XML REC uses
6461 * it. Is the XML REC not well-formed ????
6462 * This is a hack to avoid this problem
6463 *
6464 * ANSWER: since lt gt amp .. are already defined,
6465 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006466 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006467 * is lousy but acceptable.
6468 */
6469 list = xmlNewDocText(ctxt->myDoc, value);
6470 if (list != NULL) {
6471 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6472 (ent->children == NULL)) {
6473 ent->children = list;
6474 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006475 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 list->parent = (xmlNodePtr) ent;
6477 } else {
6478 xmlFreeNodeList(list);
6479 }
6480 } else if (list != NULL) {
6481 xmlFreeNodeList(list);
6482 }
6483 } else {
6484 /*
6485 * 4.3.2: An internal general parsed entity is well-formed
6486 * if its replacement text matches the production labeled
6487 * content.
6488 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006489
6490 void *user_data;
6491 /*
6492 * This is a bit hackish but this seems the best
6493 * way to make sure both SAX and DOM entity support
6494 * behaves okay.
6495 */
6496 if (ctxt->userData == ctxt)
6497 user_data = NULL;
6498 else
6499 user_data = ctxt->userData;
6500
Owen Taylor3473f882001-02-23 17:55:21 +00006501 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6502 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006503 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6504 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006505 ctxt->depth--;
6506 } else if (ent->etype ==
6507 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6508 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006509 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006510 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006511 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006512 ctxt->depth--;
6513 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006514 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006515 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6516 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006517 }
6518 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006519 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006520 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006521 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006522 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6523 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006524 (ent->children == NULL)) {
6525 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006526 if (ctxt->replaceEntities) {
6527 /*
6528 * Prune it directly in the generated document
6529 * except for single text nodes.
6530 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006531 if (((list->type == XML_TEXT_NODE) &&
6532 (list->next == NULL)) ||
6533 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006534 list->parent = (xmlNodePtr) ent;
6535 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006536 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006537 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006538 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006539 while (list != NULL) {
6540 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006541 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006542 if (list->next == NULL)
6543 ent->last = list;
6544 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006545 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006546 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006547#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006548 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6549 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006550#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006551 }
6552 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006553 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006554 while (list != NULL) {
6555 list->parent = (xmlNodePtr) ent;
6556 if (list->next == NULL)
6557 ent->last = list;
6558 list = list->next;
6559 }
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 } else {
6562 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006563 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006564 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006565 } else if ((ret != XML_ERR_OK) &&
6566 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6568 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006569 } else if (list != NULL) {
6570 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006571 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006572 }
6573 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006574 ent->checked = 1;
6575 }
6576
6577 if (ent->children == NULL) {
6578 /*
6579 * Probably running in SAX mode and the callbacks don't
6580 * build the entity content. So unless we already went
6581 * though parsing for first checking go though the entity
6582 * content to generate callbacks associated to the entity
6583 */
6584 if (was_checked == 1) {
6585 void *user_data;
6586 /*
6587 * This is a bit hackish but this seems the best
6588 * way to make sure both SAX and DOM entity support
6589 * behaves okay.
6590 */
6591 if (ctxt->userData == ctxt)
6592 user_data = NULL;
6593 else
6594 user_data = ctxt->userData;
6595
6596 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6597 ctxt->depth++;
6598 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6599 ent->content, user_data, NULL);
6600 ctxt->depth--;
6601 } else if (ent->etype ==
6602 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6603 ctxt->depth++;
6604 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6605 ctxt->sax, user_data, ctxt->depth,
6606 ent->URI, ent->ExternalID, NULL);
6607 ctxt->depth--;
6608 } else {
6609 ret = XML_ERR_ENTITY_PE_INTERNAL;
6610 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6611 "invalid entity type found\n", NULL);
6612 }
6613 if (ret == XML_ERR_ENTITY_LOOP) {
6614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6615 return;
6616 }
6617 }
6618 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6619 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6620 /*
6621 * Entity reference callback comes second, it's somewhat
6622 * superfluous but a compatibility to historical behaviour
6623 */
6624 ctxt->sax->reference(ctxt->userData, ent->name);
6625 }
6626 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006629 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 /*
6631 * Create a node.
6632 */
6633 ctxt->sax->reference(ctxt->userData, ent->name);
6634 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006635 }
6636 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006637 /*
6638 * There is a problem on the handling of _private for entities
6639 * (bug 155816): Should we copy the content of the field from
6640 * the entity (possibly overwriting some value set by the user
6641 * when a copy is created), should we leave it alone, or should
6642 * we try to take care of different situations? The problem
6643 * is exacerbated by the usage of this field by the xmlReader.
6644 * To fix this bug, we look at _private on the created node
6645 * and, if it's NULL, we copy in whatever was in the entity.
6646 * If it's not NULL we leave it alone. This is somewhat of a
6647 * hack - maybe we should have further tests to determine
6648 * what to do.
6649 */
Owen Taylor3473f882001-02-23 17:55:21 +00006650 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6651 /*
6652 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006653 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006654 * In the first occurrence list contains the replacement.
6655 * progressive == 2 means we are operating on the Reader
6656 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006657 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006658 if (((list == NULL) && (ent->owner == 0)) ||
6659 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006660 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006661
6662 /*
6663 * when operating on a reader, the entities definitions
6664 * are always owning the entities subtree.
6665 if (ctxt->parseMode == XML_PARSE_READER)
6666 ent->owner = 1;
6667 */
6668
Daniel Veillard62f313b2001-07-04 19:49:14 +00006669 cur = ent->children;
6670 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006671 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006672 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006673 if (nw->_private == NULL)
6674 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006675 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006676 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006677 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006678 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006679 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006680 if (cur == ent->last) {
6681 /*
6682 * needed to detect some strange empty
6683 * node cases in the reader tests
6684 */
6685 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006686 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006687 (nw->type == XML_ELEMENT_NODE) &&
6688 (nw->children == NULL))
6689 nw->extra = 1;
6690
Daniel Veillard62f313b2001-07-04 19:49:14 +00006691 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006692 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006693 cur = cur->next;
6694 }
Daniel Veillard81273902003-09-30 00:43:48 +00006695#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006696 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006697 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006698#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006699 } else if (list == NULL) {
6700 xmlNodePtr nw = NULL, cur, next, last,
6701 firstChild = NULL;
6702 /*
6703 * Copy the entity child list and make it the new
6704 * entity child list. The goal is to make sure any
6705 * ID or REF referenced will be the one from the
6706 * document content and not the entity copy.
6707 */
6708 cur = ent->children;
6709 ent->children = NULL;
6710 last = ent->last;
6711 ent->last = NULL;
6712 while (cur != NULL) {
6713 next = cur->next;
6714 cur->next = NULL;
6715 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006716 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006717 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006718 if (nw->_private == NULL)
6719 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006720 if (firstChild == NULL){
6721 firstChild = cur;
6722 }
6723 xmlAddChild((xmlNodePtr) ent, nw);
6724 xmlAddChild(ctxt->node, cur);
6725 }
6726 if (cur == last)
6727 break;
6728 cur = next;
6729 }
6730 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006731#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006732 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6733 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006734#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006735 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006736 const xmlChar *nbktext;
6737
Daniel Veillard62f313b2001-07-04 19:49:14 +00006738 /*
6739 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006740 * node with a possible previous text one which
6741 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006742 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006743 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6744 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006745 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006746 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006747 if ((ent->last != ent->children) &&
6748 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006749 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006750 xmlAddChildList(ctxt->node, ent->children);
6751 }
6752
Owen Taylor3473f882001-02-23 17:55:21 +00006753 /*
6754 * This is to avoid a nasty side effect, see
6755 * characters() in SAX.c
6756 */
6757 ctxt->nodemem = 0;
6758 ctxt->nodelen = 0;
6759 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006760 }
6761 }
6762 } else {
6763 val = ent->content;
6764 if (val == NULL) return;
6765 /*
6766 * inline the entity.
6767 */
6768 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6769 (!ctxt->disableSAX))
6770 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6771 }
6772 }
6773}
6774
6775/**
6776 * xmlParseEntityRef:
6777 * @ctxt: an XML parser context
6778 *
6779 * parse ENTITY references declarations
6780 *
6781 * [68] EntityRef ::= '&' Name ';'
6782 *
6783 * [ WFC: Entity Declared ]
6784 * In a document without any DTD, a document with only an internal DTD
6785 * subset which contains no parameter entity references, or a document
6786 * with "standalone='yes'", the Name given in the entity reference
6787 * must match that in an entity declaration, except that well-formed
6788 * documents need not declare any of the following entities: amp, lt,
6789 * gt, apos, quot. The declaration of a parameter entity must precede
6790 * any reference to it. Similarly, the declaration of a general entity
6791 * must precede any reference to it which appears in a default value in an
6792 * attribute-list declaration. Note that if entities are declared in the
6793 * external subset or in external parameter entities, a non-validating
6794 * processor is not obligated to read and process their declarations;
6795 * for such documents, the rule that an entity must be declared is a
6796 * well-formedness constraint only if standalone='yes'.
6797 *
6798 * [ WFC: Parsed Entity ]
6799 * An entity reference must not contain the name of an unparsed entity
6800 *
6801 * Returns the xmlEntityPtr if found, or NULL otherwise.
6802 */
6803xmlEntityPtr
6804xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006805 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006806 xmlEntityPtr ent = NULL;
6807
6808 GROW;
6809
6810 if (RAW == '&') {
6811 NEXT;
6812 name = xmlParseName(ctxt);
6813 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006814 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6815 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006816 } else {
6817 if (RAW == ';') {
6818 NEXT;
6819 /*
6820 * Ask first SAX for entity resolution, otherwise try the
6821 * predefined set.
6822 */
6823 if (ctxt->sax != NULL) {
6824 if (ctxt->sax->getEntity != NULL)
6825 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006826 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006827 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006828 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6829 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006830 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006831 }
Owen Taylor3473f882001-02-23 17:55:21 +00006832 }
6833 /*
6834 * [ WFC: Entity Declared ]
6835 * In a document without any DTD, a document with only an
6836 * internal DTD subset which contains no parameter entity
6837 * references, or a document with "standalone='yes'", the
6838 * Name given in the entity reference must match that in an
6839 * entity declaration, except that well-formed documents
6840 * need not declare any of the following entities: amp, lt,
6841 * gt, apos, quot.
6842 * The declaration of a parameter entity must precede any
6843 * reference to it.
6844 * Similarly, the declaration of a general entity must
6845 * precede any reference to it which appears in a default
6846 * value in an attribute-list declaration. Note that if
6847 * entities are declared in the external subset or in
6848 * external parameter entities, a non-validating processor
6849 * is not obligated to read and process their declarations;
6850 * for such documents, the rule that an entity must be
6851 * declared is a well-formedness constraint only if
6852 * standalone='yes'.
6853 */
6854 if (ent == NULL) {
6855 if ((ctxt->standalone == 1) ||
6856 ((ctxt->hasExternalSubset == 0) &&
6857 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006858 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006859 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006861 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006862 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006863 if ((ctxt->inSubset == 0) &&
6864 (ctxt->sax != NULL) &&
6865 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006866 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006867 }
Owen Taylor3473f882001-02-23 17:55:21 +00006868 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006869 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 }
6871
6872 /*
6873 * [ WFC: Parsed Entity ]
6874 * An entity reference must not contain the name of an
6875 * unparsed entity
6876 */
6877 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006879 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006880 }
6881
6882 /*
6883 * [ WFC: No External Entity References ]
6884 * Attribute values cannot contain direct or indirect
6885 * entity references to external entities.
6886 */
6887 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6888 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006889 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6890 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006891 }
6892 /*
6893 * [ WFC: No < in Attribute Values ]
6894 * The replacement text of any entity referred to directly or
6895 * indirectly in an attribute value (other than "&lt;") must
6896 * not contain a <.
6897 */
6898 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6899 (ent != NULL) &&
6900 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6901 (ent->content != NULL) &&
6902 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006903 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006904 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006905 }
6906
6907 /*
6908 * Internal check, no parameter entities here ...
6909 */
6910 else {
6911 switch (ent->etype) {
6912 case XML_INTERNAL_PARAMETER_ENTITY:
6913 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006914 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6915 "Attempt to reference the parameter entity '%s'\n",
6916 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006917 break;
6918 default:
6919 break;
6920 }
6921 }
6922
6923 /*
6924 * [ WFC: No Recursion ]
6925 * A parsed entity must not contain a recursive reference
6926 * to itself, either directly or indirectly.
6927 * Done somewhere else
6928 */
6929
6930 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006931 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006932 }
Owen Taylor3473f882001-02-23 17:55:21 +00006933 }
6934 }
6935 return(ent);
6936}
6937
6938/**
6939 * xmlParseStringEntityRef:
6940 * @ctxt: an XML parser context
6941 * @str: a pointer to an index in the string
6942 *
6943 * parse ENTITY references declarations, but this version parses it from
6944 * a string value.
6945 *
6946 * [68] EntityRef ::= '&' Name ';'
6947 *
6948 * [ WFC: Entity Declared ]
6949 * In a document without any DTD, a document with only an internal DTD
6950 * subset which contains no parameter entity references, or a document
6951 * with "standalone='yes'", the Name given in the entity reference
6952 * must match that in an entity declaration, except that well-formed
6953 * documents need not declare any of the following entities: amp, lt,
6954 * gt, apos, quot. The declaration of a parameter entity must precede
6955 * any reference to it. Similarly, the declaration of a general entity
6956 * must precede any reference to it which appears in a default value in an
6957 * attribute-list declaration. Note that if entities are declared in the
6958 * external subset or in external parameter entities, a non-validating
6959 * processor is not obligated to read and process their declarations;
6960 * for such documents, the rule that an entity must be declared is a
6961 * well-formedness constraint only if standalone='yes'.
6962 *
6963 * [ WFC: Parsed Entity ]
6964 * An entity reference must not contain the name of an unparsed entity
6965 *
6966 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6967 * is updated to the current location in the string.
6968 */
6969xmlEntityPtr
6970xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6971 xmlChar *name;
6972 const xmlChar *ptr;
6973 xmlChar cur;
6974 xmlEntityPtr ent = NULL;
6975
6976 if ((str == NULL) || (*str == NULL))
6977 return(NULL);
6978 ptr = *str;
6979 cur = *ptr;
6980 if (cur == '&') {
6981 ptr++;
6982 cur = *ptr;
6983 name = xmlParseStringName(ctxt, &ptr);
6984 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006985 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6986 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006987 } else {
6988 if (*ptr == ';') {
6989 ptr++;
6990 /*
6991 * Ask first SAX for entity resolution, otherwise try the
6992 * predefined set.
6993 */
6994 if (ctxt->sax != NULL) {
6995 if (ctxt->sax->getEntity != NULL)
6996 ent = ctxt->sax->getEntity(ctxt->userData, name);
6997 if (ent == NULL)
6998 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006999 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00007000 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007001 }
Owen Taylor3473f882001-02-23 17:55:21 +00007002 }
7003 /*
7004 * [ WFC: Entity Declared ]
7005 * In a document without any DTD, a document with only an
7006 * internal DTD subset which contains no parameter entity
7007 * references, or a document with "standalone='yes'", the
7008 * Name given in the entity reference must match that in an
7009 * entity declaration, except that well-formed documents
7010 * need not declare any of the following entities: amp, lt,
7011 * gt, apos, quot.
7012 * The declaration of a parameter entity must precede any
7013 * reference to it.
7014 * Similarly, the declaration of a general entity must
7015 * precede any reference to it which appears in a default
7016 * value in an attribute-list declaration. Note that if
7017 * entities are declared in the external subset or in
7018 * external parameter entities, a non-validating processor
7019 * is not obligated to read and process their declarations;
7020 * for such documents, the rule that an entity must be
7021 * declared is a well-formedness constraint only if
7022 * standalone='yes'.
7023 */
7024 if (ent == NULL) {
7025 if ((ctxt->standalone == 1) ||
7026 ((ctxt->hasExternalSubset == 0) &&
7027 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007028 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007029 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007030 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007031 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00007032 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007033 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007034 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00007035 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00007036 }
7037
7038 /*
7039 * [ WFC: Parsed Entity ]
7040 * An entity reference must not contain the name of an
7041 * unparsed entity
7042 */
7043 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007044 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007045 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007046 }
7047
7048 /*
7049 * [ WFC: No External Entity References ]
7050 * Attribute values cannot contain direct or indirect
7051 * entity references to external entities.
7052 */
7053 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7054 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007055 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00007056 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007057 }
7058 /*
7059 * [ WFC: No < in Attribute Values ]
7060 * The replacement text of any entity referred to directly or
7061 * indirectly in an attribute value (other than "&lt;") must
7062 * not contain a <.
7063 */
7064 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7065 (ent != NULL) &&
7066 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7067 (ent->content != NULL) &&
7068 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007069 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7070 "'<' in entity '%s' is not allowed in attributes values\n",
7071 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007072 }
7073
7074 /*
7075 * Internal check, no parameter entities here ...
7076 */
7077 else {
7078 switch (ent->etype) {
7079 case XML_INTERNAL_PARAMETER_ENTITY:
7080 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00007081 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7082 "Attempt to reference the parameter entity '%s'\n",
7083 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007084 break;
7085 default:
7086 break;
7087 }
7088 }
7089
7090 /*
7091 * [ WFC: No Recursion ]
7092 * A parsed entity must not contain a recursive reference
7093 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007094 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00007095 */
7096
7097 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007098 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007099 }
7100 xmlFree(name);
7101 }
7102 }
7103 *str = ptr;
7104 return(ent);
7105}
7106
7107/**
7108 * xmlParsePEReference:
7109 * @ctxt: an XML parser context
7110 *
7111 * parse PEReference declarations
7112 * The entity content is handled directly by pushing it's content as
7113 * a new input stream.
7114 *
7115 * [69] PEReference ::= '%' Name ';'
7116 *
7117 * [ WFC: No Recursion ]
7118 * A parsed entity must not contain a recursive
7119 * reference to itself, either directly or indirectly.
7120 *
7121 * [ WFC: Entity Declared ]
7122 * In a document without any DTD, a document with only an internal DTD
7123 * subset which contains no parameter entity references, or a document
7124 * with "standalone='yes'", ... ... The declaration of a parameter
7125 * entity must precede any reference to it...
7126 *
7127 * [ VC: Entity Declared ]
7128 * In a document with an external subset or external parameter entities
7129 * with "standalone='no'", ... ... The declaration of a parameter entity
7130 * must precede any reference to it...
7131 *
7132 * [ WFC: In DTD ]
7133 * Parameter-entity references may only appear in the DTD.
7134 * NOTE: misleading but this is handled.
7135 */
7136void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007137xmlParsePEReference(xmlParserCtxtPtr ctxt)
7138{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007139 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007140 xmlEntityPtr entity = NULL;
7141 xmlParserInputPtr input;
7142
7143 if (RAW == '%') {
7144 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00007145 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00007146 if (name == NULL) {
7147 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7148 "xmlParsePEReference: no name\n");
7149 } else {
7150 if (RAW == ';') {
7151 NEXT;
7152 if ((ctxt->sax != NULL) &&
7153 (ctxt->sax->getParameterEntity != NULL))
7154 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7155 name);
7156 if (entity == NULL) {
7157 /*
7158 * [ WFC: Entity Declared ]
7159 * In a document without any DTD, a document with only an
7160 * internal DTD subset which contains no parameter entity
7161 * references, or a document with "standalone='yes'", ...
7162 * ... The declaration of a parameter entity must precede
7163 * any reference to it...
7164 */
7165 if ((ctxt->standalone == 1) ||
7166 ((ctxt->hasExternalSubset == 0) &&
7167 (ctxt->hasPErefs == 0))) {
7168 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7169 "PEReference: %%%s; not found\n",
7170 name);
7171 } else {
7172 /*
7173 * [ VC: Entity Declared ]
7174 * In a document with an external subset or external
7175 * parameter entities with "standalone='no'", ...
7176 * ... The declaration of a parameter entity must
7177 * precede any reference to it...
7178 */
7179 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7180 "PEReference: %%%s; not found\n",
7181 name, NULL);
7182 ctxt->valid = 0;
7183 }
7184 } else {
7185 /*
7186 * Internal checking in case the entity quest barfed
7187 */
7188 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7189 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7190 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7191 "Internal: %%%s; is not a parameter entity\n",
7192 name, NULL);
7193 } else if (ctxt->input->free != deallocblankswrapper) {
7194 input =
7195 xmlNewBlanksWrapperInputStream(ctxt, entity);
7196 xmlPushInput(ctxt, input);
7197 } else {
7198 /*
7199 * TODO !!!
7200 * handle the extra spaces added before and after
7201 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7202 */
7203 input = xmlNewEntityInputStream(ctxt, entity);
7204 xmlPushInput(ctxt, input);
7205 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00007206 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00007207 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00007208 xmlParseTextDecl(ctxt);
7209 if (ctxt->errNo ==
7210 XML_ERR_UNSUPPORTED_ENCODING) {
7211 /*
7212 * The XML REC instructs us to stop parsing
7213 * right here
7214 */
7215 ctxt->instate = XML_PARSER_EOF;
7216 return;
7217 }
7218 }
7219 }
7220 }
7221 ctxt->hasPErefs = 1;
7222 } else {
7223 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7224 }
7225 }
Owen Taylor3473f882001-02-23 17:55:21 +00007226 }
7227}
7228
7229/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007230 * xmlLoadEntityContent:
7231 * @ctxt: an XML parser context
7232 * @entity: an unloaded system entity
7233 *
7234 * Load the original content of the given system entity from the
7235 * ExternalID/SystemID given. This is to be used for Included in Literal
7236 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7237 *
7238 * Returns 0 in case of success and -1 in case of failure
7239 */
7240static int
7241xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7242 xmlParserInputPtr input;
7243 xmlBufferPtr buf;
7244 int l, c;
7245 int count = 0;
7246
7247 if ((ctxt == NULL) || (entity == NULL) ||
7248 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7249 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7250 (entity->content != NULL)) {
7251 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7252 "xmlLoadEntityContent parameter error");
7253 return(-1);
7254 }
7255
7256 if (xmlParserDebugEntities)
7257 xmlGenericError(xmlGenericErrorContext,
7258 "Reading %s entity content input\n", entity->name);
7259
7260 buf = xmlBufferCreate();
7261 if (buf == NULL) {
7262 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7263 "xmlLoadEntityContent parameter error");
7264 return(-1);
7265 }
7266
7267 input = xmlNewEntityInputStream(ctxt, entity);
7268 if (input == NULL) {
7269 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7270 "xmlLoadEntityContent input error");
7271 xmlBufferFree(buf);
7272 return(-1);
7273 }
7274
7275 /*
7276 * Push the entity as the current input, read char by char
7277 * saving to the buffer until the end of the entity or an error
7278 */
7279 xmlPushInput(ctxt, input);
7280 GROW;
7281 c = CUR_CHAR(l);
7282 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7283 (IS_CHAR(c))) {
7284 xmlBufferAdd(buf, ctxt->input->cur, l);
7285 if (count++ > 100) {
7286 count = 0;
7287 GROW;
7288 }
7289 NEXTL(l);
7290 c = CUR_CHAR(l);
7291 }
7292
7293 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7294 xmlPopInput(ctxt);
7295 } else if (!IS_CHAR(c)) {
7296 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7297 "xmlLoadEntityContent: invalid char value %d\n",
7298 c);
7299 xmlBufferFree(buf);
7300 return(-1);
7301 }
7302 entity->content = buf->content;
7303 buf->content = NULL;
7304 xmlBufferFree(buf);
7305
7306 return(0);
7307}
7308
7309/**
Owen Taylor3473f882001-02-23 17:55:21 +00007310 * xmlParseStringPEReference:
7311 * @ctxt: an XML parser context
7312 * @str: a pointer to an index in the string
7313 *
7314 * parse PEReference declarations
7315 *
7316 * [69] PEReference ::= '%' Name ';'
7317 *
7318 * [ WFC: No Recursion ]
7319 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007320 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007321 *
7322 * [ WFC: Entity Declared ]
7323 * In a document without any DTD, a document with only an internal DTD
7324 * subset which contains no parameter entity references, or a document
7325 * with "standalone='yes'", ... ... The declaration of a parameter
7326 * entity must precede any reference to it...
7327 *
7328 * [ VC: Entity Declared ]
7329 * In a document with an external subset or external parameter entities
7330 * with "standalone='no'", ... ... The declaration of a parameter entity
7331 * must precede any reference to it...
7332 *
7333 * [ WFC: In DTD ]
7334 * Parameter-entity references may only appear in the DTD.
7335 * NOTE: misleading but this is handled.
7336 *
7337 * Returns the string of the entity content.
7338 * str is updated to the current value of the index
7339 */
7340xmlEntityPtr
7341xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7342 const xmlChar *ptr;
7343 xmlChar cur;
7344 xmlChar *name;
7345 xmlEntityPtr entity = NULL;
7346
7347 if ((str == NULL) || (*str == NULL)) return(NULL);
7348 ptr = *str;
7349 cur = *ptr;
7350 if (cur == '%') {
7351 ptr++;
7352 cur = *ptr;
7353 name = xmlParseStringName(ctxt, &ptr);
7354 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007355 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7356 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007357 } else {
7358 cur = *ptr;
7359 if (cur == ';') {
7360 ptr++;
7361 cur = *ptr;
7362 if ((ctxt->sax != NULL) &&
7363 (ctxt->sax->getParameterEntity != NULL))
7364 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7365 name);
7366 if (entity == NULL) {
7367 /*
7368 * [ WFC: Entity Declared ]
7369 * In a document without any DTD, a document with only an
7370 * internal DTD subset which contains no parameter entity
7371 * references, or a document with "standalone='yes'", ...
7372 * ... The declaration of a parameter entity must precede
7373 * any reference to it...
7374 */
7375 if ((ctxt->standalone == 1) ||
7376 ((ctxt->hasExternalSubset == 0) &&
7377 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007378 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007379 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007380 } else {
7381 /*
7382 * [ VC: Entity Declared ]
7383 * In a document with an external subset or external
7384 * parameter entities with "standalone='no'", ...
7385 * ... The declaration of a parameter entity must
7386 * precede any reference to it...
7387 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007388 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7389 "PEReference: %%%s; not found\n",
7390 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007391 ctxt->valid = 0;
7392 }
7393 } else {
7394 /*
7395 * Internal checking in case the entity quest barfed
7396 */
7397 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7398 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007399 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7400 "%%%s; is not a parameter entity\n",
7401 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007402 }
7403 }
7404 ctxt->hasPErefs = 1;
7405 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007406 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007407 }
7408 xmlFree(name);
7409 }
7410 }
7411 *str = ptr;
7412 return(entity);
7413}
7414
7415/**
7416 * xmlParseDocTypeDecl:
7417 * @ctxt: an XML parser context
7418 *
7419 * parse a DOCTYPE declaration
7420 *
7421 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7422 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7423 *
7424 * [ VC: Root Element Type ]
7425 * The Name in the document type declaration must match the element
7426 * type of the root element.
7427 */
7428
7429void
7430xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007431 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007432 xmlChar *ExternalID = NULL;
7433 xmlChar *URI = NULL;
7434
7435 /*
7436 * We know that '<!DOCTYPE' has been detected.
7437 */
7438 SKIP(9);
7439
7440 SKIP_BLANKS;
7441
7442 /*
7443 * Parse the DOCTYPE name.
7444 */
7445 name = xmlParseName(ctxt);
7446 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007447 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7448 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007449 }
7450 ctxt->intSubName = name;
7451
7452 SKIP_BLANKS;
7453
7454 /*
7455 * Check for SystemID and ExternalID
7456 */
7457 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7458
7459 if ((URI != NULL) || (ExternalID != NULL)) {
7460 ctxt->hasExternalSubset = 1;
7461 }
7462 ctxt->extSubURI = URI;
7463 ctxt->extSubSystem = ExternalID;
7464
7465 SKIP_BLANKS;
7466
7467 /*
7468 * Create and update the internal subset.
7469 */
7470 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7471 (!ctxt->disableSAX))
7472 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7473
7474 /*
7475 * Is there any internal subset declarations ?
7476 * they are handled separately in xmlParseInternalSubset()
7477 */
7478 if (RAW == '[')
7479 return;
7480
7481 /*
7482 * We should be at the end of the DOCTYPE declaration.
7483 */
7484 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007485 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007486 }
7487 NEXT;
7488}
7489
7490/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007491 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007492 * @ctxt: an XML parser context
7493 *
7494 * parse the internal subset declaration
7495 *
7496 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7497 */
7498
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007499static void
Owen Taylor3473f882001-02-23 17:55:21 +00007500xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7501 /*
7502 * Is there any DTD definition ?
7503 */
7504 if (RAW == '[') {
7505 ctxt->instate = XML_PARSER_DTD;
7506 NEXT;
7507 /*
7508 * Parse the succession of Markup declarations and
7509 * PEReferences.
7510 * Subsequence (markupdecl | PEReference | S)*
7511 */
7512 while (RAW != ']') {
7513 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007514 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007515
7516 SKIP_BLANKS;
7517 xmlParseMarkupDecl(ctxt);
7518 xmlParsePEReference(ctxt);
7519
7520 /*
7521 * Pop-up of finished entities.
7522 */
7523 while ((RAW == 0) && (ctxt->inputNr > 1))
7524 xmlPopInput(ctxt);
7525
7526 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007527 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007528 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007529 break;
7530 }
7531 }
7532 if (RAW == ']') {
7533 NEXT;
7534 SKIP_BLANKS;
7535 }
7536 }
7537
7538 /*
7539 * We should be at the end of the DOCTYPE declaration.
7540 */
7541 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007542 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007543 }
7544 NEXT;
7545}
7546
Daniel Veillard81273902003-09-30 00:43:48 +00007547#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007548/**
7549 * xmlParseAttribute:
7550 * @ctxt: an XML parser context
7551 * @value: a xmlChar ** used to store the value of the attribute
7552 *
7553 * parse an attribute
7554 *
7555 * [41] Attribute ::= Name Eq AttValue
7556 *
7557 * [ WFC: No External Entity References ]
7558 * Attribute values cannot contain direct or indirect entity references
7559 * to external entities.
7560 *
7561 * [ WFC: No < in Attribute Values ]
7562 * The replacement text of any entity referred to directly or indirectly in
7563 * an attribute value (other than "&lt;") must not contain a <.
7564 *
7565 * [ VC: Attribute Value Type ]
7566 * The attribute must have been declared; the value must be of the type
7567 * declared for it.
7568 *
7569 * [25] Eq ::= S? '=' S?
7570 *
7571 * With namespace:
7572 *
7573 * [NS 11] Attribute ::= QName Eq AttValue
7574 *
7575 * Also the case QName == xmlns:??? is handled independently as a namespace
7576 * definition.
7577 *
7578 * Returns the attribute name, and the value in *value.
7579 */
7580
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007581const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007582xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007583 const xmlChar *name;
7584 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007585
7586 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007587 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007588 name = xmlParseName(ctxt);
7589 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007590 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007591 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007592 return(NULL);
7593 }
7594
7595 /*
7596 * read the value
7597 */
7598 SKIP_BLANKS;
7599 if (RAW == '=') {
7600 NEXT;
7601 SKIP_BLANKS;
7602 val = xmlParseAttValue(ctxt);
7603 ctxt->instate = XML_PARSER_CONTENT;
7604 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007605 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007606 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007607 return(NULL);
7608 }
7609
7610 /*
7611 * Check that xml:lang conforms to the specification
7612 * No more registered as an error, just generate a warning now
7613 * since this was deprecated in XML second edition
7614 */
7615 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7616 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007617 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7618 "Malformed value for xml:lang : %s\n",
7619 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007620 }
7621 }
7622
7623 /*
7624 * Check that xml:space conforms to the specification
7625 */
7626 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7627 if (xmlStrEqual(val, BAD_CAST "default"))
7628 *(ctxt->space) = 0;
7629 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7630 *(ctxt->space) = 1;
7631 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007632 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007633"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007634 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007635 }
7636 }
7637
7638 *value = val;
7639 return(name);
7640}
7641
7642/**
7643 * xmlParseStartTag:
7644 * @ctxt: an XML parser context
7645 *
7646 * parse a start of tag either for rule element or
7647 * EmptyElement. In both case we don't parse the tag closing chars.
7648 *
7649 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7650 *
7651 * [ WFC: Unique Att Spec ]
7652 * No attribute name may appear more than once in the same start-tag or
7653 * empty-element tag.
7654 *
7655 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7656 *
7657 * [ WFC: Unique Att Spec ]
7658 * No attribute name may appear more than once in the same start-tag or
7659 * empty-element tag.
7660 *
7661 * With namespace:
7662 *
7663 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7664 *
7665 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7666 *
7667 * Returns the element name parsed
7668 */
7669
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007670const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007671xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007672 const xmlChar *name;
7673 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007674 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007675 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007676 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007677 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007678 int i;
7679
7680 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007681 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007682
7683 name = xmlParseName(ctxt);
7684 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007685 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007686 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007687 return(NULL);
7688 }
7689
7690 /*
7691 * Now parse the attributes, it ends up with the ending
7692 *
7693 * (S Attribute)* S?
7694 */
7695 SKIP_BLANKS;
7696 GROW;
7697
Daniel Veillard21a0f912001-02-25 19:54:14 +00007698 while ((RAW != '>') &&
7699 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007700 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007701 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007702 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007703
7704 attname = xmlParseAttribute(ctxt, &attvalue);
7705 if ((attname != NULL) && (attvalue != NULL)) {
7706 /*
7707 * [ WFC: Unique Att Spec ]
7708 * No attribute name may appear more than once in the same
7709 * start-tag or empty-element tag.
7710 */
7711 for (i = 0; i < nbatts;i += 2) {
7712 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007713 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007714 xmlFree(attvalue);
7715 goto failed;
7716 }
7717 }
Owen Taylor3473f882001-02-23 17:55:21 +00007718 /*
7719 * Add the pair to atts
7720 */
7721 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007722 maxatts = 22; /* allow for 10 attrs by default */
7723 atts = (const xmlChar **)
7724 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007725 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007726 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007727 if (attvalue != NULL)
7728 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007729 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007730 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007731 ctxt->atts = atts;
7732 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007733 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007734 const xmlChar **n;
7735
Owen Taylor3473f882001-02-23 17:55:21 +00007736 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007737 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007738 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007739 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007740 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007741 if (attvalue != NULL)
7742 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007743 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007744 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007745 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007746 ctxt->atts = atts;
7747 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007748 }
7749 atts[nbatts++] = attname;
7750 atts[nbatts++] = attvalue;
7751 atts[nbatts] = NULL;
7752 atts[nbatts + 1] = NULL;
7753 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007754 if (attvalue != NULL)
7755 xmlFree(attvalue);
7756 }
7757
7758failed:
7759
Daniel Veillard3772de32002-12-17 10:31:45 +00007760 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007761 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7762 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007763 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007764 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7765 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007766 }
7767 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007768 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7769 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007770 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7771 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007772 break;
7773 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007774 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007775 GROW;
7776 }
7777
7778 /*
7779 * SAX: Start of Element !
7780 */
7781 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007782 (!ctxt->disableSAX)) {
7783 if (nbatts > 0)
7784 ctxt->sax->startElement(ctxt->userData, name, atts);
7785 else
7786 ctxt->sax->startElement(ctxt->userData, name, NULL);
7787 }
Owen Taylor3473f882001-02-23 17:55:21 +00007788
7789 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007790 /* Free only the content strings */
7791 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007792 if (atts[i] != NULL)
7793 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007794 }
7795 return(name);
7796}
7797
7798/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007800 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007801 * @line: line of the start tag
7802 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007803 *
7804 * parse an end of tag
7805 *
7806 * [42] ETag ::= '</' Name S? '>'
7807 *
7808 * With namespace
7809 *
7810 * [NS 9] ETag ::= '</' QName S? '>'
7811 */
7812
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007813static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007815 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007816
7817 GROW;
7818 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007819 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007820 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007821 return;
7822 }
7823 SKIP(2);
7824
Daniel Veillard46de64e2002-05-29 08:21:33 +00007825 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007826
7827 /*
7828 * We should definitely be at the ending "S? '>'" part
7829 */
7830 GROW;
7831 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007832 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007833 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007834 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007835 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007836
7837 /*
7838 * [ WFC: Element Type Match ]
7839 * The Name in an element's end-tag must match the element type in the
7840 * start-tag.
7841 *
7842 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007843 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007844 if (name == NULL) name = BAD_CAST "unparseable";
7845 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007846 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007847 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007848 }
7849
7850 /*
7851 * SAX: End of Tag
7852 */
7853 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7854 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007855 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007856
Daniel Veillarde57ec792003-09-10 10:50:59 +00007857 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007858 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007859 return;
7860}
7861
7862/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007863 * xmlParseEndTag:
7864 * @ctxt: an XML parser context
7865 *
7866 * parse an end of tag
7867 *
7868 * [42] ETag ::= '</' Name S? '>'
7869 *
7870 * With namespace
7871 *
7872 * [NS 9] ETag ::= '</' QName S? '>'
7873 */
7874
7875void
7876xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007877 xmlParseEndTag1(ctxt, 0);
7878}
Daniel Veillard81273902003-09-30 00:43:48 +00007879#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007880
7881/************************************************************************
7882 * *
7883 * SAX 2 specific operations *
7884 * *
7885 ************************************************************************/
7886
Daniel Veillard0fb18932003-09-07 09:14:37 +00007887/*
7888 * xmlGetNamespace:
7889 * @ctxt: an XML parser context
7890 * @prefix: the prefix to lookup
7891 *
7892 * Lookup the namespace name for the @prefix (which ca be NULL)
7893 * The prefix must come from the @ctxt->dict dictionnary
7894 *
7895 * Returns the namespace name or NULL if not bound
7896 */
7897static const xmlChar *
7898xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7899 int i;
7900
Daniel Veillarde57ec792003-09-10 10:50:59 +00007901 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007903 if (ctxt->nsTab[i] == prefix) {
7904 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7905 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007906 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007907 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 return(NULL);
7909}
7910
7911/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007912 * xmlParseQName:
7913 * @ctxt: an XML parser context
7914 * @prefix: pointer to store the prefix part
7915 *
7916 * parse an XML Namespace QName
7917 *
7918 * [6] QName ::= (Prefix ':')? LocalPart
7919 * [7] Prefix ::= NCName
7920 * [8] LocalPart ::= NCName
7921 *
7922 * Returns the Name parsed or NULL
7923 */
7924
7925static const xmlChar *
7926xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7927 const xmlChar *l, *p;
7928
7929 GROW;
7930
7931 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007932 if (l == NULL) {
7933 if (CUR == ':') {
7934 l = xmlParseName(ctxt);
7935 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007936 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7937 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007938 *prefix = NULL;
7939 return(l);
7940 }
7941 }
7942 return(NULL);
7943 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007944 if (CUR == ':') {
7945 NEXT;
7946 p = l;
7947 l = xmlParseNCName(ctxt);
7948 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007949 xmlChar *tmp;
7950
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007951 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7952 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007953 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7954 p = xmlDictLookup(ctxt->dict, tmp, -1);
7955 if (tmp != NULL) xmlFree(tmp);
7956 *prefix = NULL;
7957 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958 }
7959 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007960 xmlChar *tmp;
7961
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007962 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7963 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007964 NEXT;
7965 tmp = (xmlChar *) xmlParseName(ctxt);
7966 if (tmp != NULL) {
7967 tmp = xmlBuildQName(tmp, l, NULL, 0);
7968 l = xmlDictLookup(ctxt->dict, tmp, -1);
7969 if (tmp != NULL) xmlFree(tmp);
7970 *prefix = p;
7971 return(l);
7972 }
7973 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7974 l = xmlDictLookup(ctxt->dict, tmp, -1);
7975 if (tmp != NULL) xmlFree(tmp);
7976 *prefix = p;
7977 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007978 }
7979 *prefix = p;
7980 } else
7981 *prefix = NULL;
7982 return(l);
7983}
7984
7985/**
7986 * xmlParseQNameAndCompare:
7987 * @ctxt: an XML parser context
7988 * @name: the localname
7989 * @prefix: the prefix, if any.
7990 *
7991 * parse an XML name and compares for match
7992 * (specialized for endtag parsing)
7993 *
7994 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7995 * and the name for mismatch
7996 */
7997
7998static const xmlChar *
7999xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8000 xmlChar const *prefix) {
8001 const xmlChar *cmp = name;
8002 const xmlChar *in;
8003 const xmlChar *ret;
8004 const xmlChar *prefix2;
8005
8006 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8007
8008 GROW;
8009 in = ctxt->input->cur;
8010
8011 cmp = prefix;
8012 while (*in != 0 && *in == *cmp) {
8013 ++in;
8014 ++cmp;
8015 }
8016 if ((*cmp == 0) && (*in == ':')) {
8017 in++;
8018 cmp = name;
8019 while (*in != 0 && *in == *cmp) {
8020 ++in;
8021 ++cmp;
8022 }
William M. Brack76e95df2003-10-18 16:20:14 +00008023 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008024 /* success */
8025 ctxt->input->cur = in;
8026 return((const xmlChar*) 1);
8027 }
8028 }
8029 /*
8030 * all strings coms from the dictionary, equality can be done directly
8031 */
8032 ret = xmlParseQName (ctxt, &prefix2);
8033 if ((ret == name) && (prefix == prefix2))
8034 return((const xmlChar*) 1);
8035 return ret;
8036}
8037
8038/**
8039 * xmlParseAttValueInternal:
8040 * @ctxt: an XML parser context
8041 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008042 * @alloc: whether the attribute was reallocated as a new string
8043 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044 *
8045 * parse a value for an attribute.
8046 * NOTE: if no normalization is needed, the routine will return pointers
8047 * directly from the data buffer.
8048 *
8049 * 3.3.3 Attribute-Value Normalization:
8050 * Before the value of an attribute is passed to the application or
8051 * checked for validity, the XML processor must normalize it as follows:
8052 * - a character reference is processed by appending the referenced
8053 * character to the attribute value
8054 * - an entity reference is processed by recursively processing the
8055 * replacement text of the entity
8056 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8057 * appending #x20 to the normalized value, except that only a single
8058 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8059 * parsed entity or the literal entity value of an internal parsed entity
8060 * - other characters are processed by appending them to the normalized value
8061 * If the declared value is not CDATA, then the XML processor must further
8062 * process the normalized attribute value by discarding any leading and
8063 * trailing space (#x20) characters, and by replacing sequences of space
8064 * (#x20) characters by a single space (#x20) character.
8065 * All attributes for which no declaration has been read should be treated
8066 * by a non-validating parser as if declared CDATA.
8067 *
8068 * Returns the AttValue parsed or NULL. The value has to be freed by the
8069 * caller if it was copied, this can be detected by val[*len] == 0.
8070 */
8071
8072static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008073xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8074 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008075{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008076 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008077 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 xmlChar *ret = NULL;
8079
8080 GROW;
8081 in = (xmlChar *) CUR_PTR;
8082 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008083 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008084 return (NULL);
8085 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008086 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008087
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008088 /*
8089 * try to handle in this routine the most common case where no
8090 * allocation of a new string is required and where content is
8091 * pure ASCII.
8092 */
8093 limit = *in++;
8094 end = ctxt->input->end;
8095 start = in;
8096 if (in >= end) {
8097 const xmlChar *oldbase = ctxt->input->base;
8098 GROW;
8099 if (oldbase != ctxt->input->base) {
8100 long delta = ctxt->input->base - oldbase;
8101 start = start + delta;
8102 in = in + delta;
8103 }
8104 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008105 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008106 if (normalize) {
8107 /*
8108 * Skip any leading spaces
8109 */
8110 while ((in < end) && (*in != limit) &&
8111 ((*in == 0x20) || (*in == 0x9) ||
8112 (*in == 0xA) || (*in == 0xD))) {
8113 in++;
8114 start = in;
8115 if (in >= end) {
8116 const xmlChar *oldbase = ctxt->input->base;
8117 GROW;
8118 if (oldbase != ctxt->input->base) {
8119 long delta = ctxt->input->base - oldbase;
8120 start = start + delta;
8121 in = in + delta;
8122 }
8123 end = ctxt->input->end;
8124 }
8125 }
8126 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8127 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8128 if ((*in++ == 0x20) && (*in == 0x20)) break;
8129 if (in >= end) {
8130 const xmlChar *oldbase = ctxt->input->base;
8131 GROW;
8132 if (oldbase != ctxt->input->base) {
8133 long delta = ctxt->input->base - oldbase;
8134 start = start + delta;
8135 in = in + delta;
8136 }
8137 end = ctxt->input->end;
8138 }
8139 }
8140 last = in;
8141 /*
8142 * skip the trailing blanks
8143 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008144 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008145 while ((in < end) && (*in != limit) &&
8146 ((*in == 0x20) || (*in == 0x9) ||
8147 (*in == 0xA) || (*in == 0xD))) {
8148 in++;
8149 if (in >= end) {
8150 const xmlChar *oldbase = ctxt->input->base;
8151 GROW;
8152 if (oldbase != ctxt->input->base) {
8153 long delta = ctxt->input->base - oldbase;
8154 start = start + delta;
8155 in = in + delta;
8156 last = last + delta;
8157 }
8158 end = ctxt->input->end;
8159 }
8160 }
8161 if (*in != limit) goto need_complex;
8162 } else {
8163 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8164 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8165 in++;
8166 if (in >= end) {
8167 const xmlChar *oldbase = ctxt->input->base;
8168 GROW;
8169 if (oldbase != ctxt->input->base) {
8170 long delta = ctxt->input->base - oldbase;
8171 start = start + delta;
8172 in = in + delta;
8173 }
8174 end = ctxt->input->end;
8175 }
8176 }
8177 last = in;
8178 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008180 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008182 *len = last - start;
8183 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008185 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008186 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187 }
8188 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008189 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008190 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008191need_complex:
8192 if (alloc) *alloc = 1;
8193 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008194}
8195
8196/**
8197 * xmlParseAttribute2:
8198 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008199 * @pref: the element prefix
8200 * @elem: the element name
8201 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008202 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008203 * @len: an int * to save the length of the attribute
8204 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 *
8206 * parse an attribute in the new SAX2 framework.
8207 *
8208 * Returns the attribute name, and the value in *value, .
8209 */
8210
8211static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008212xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008213 const xmlChar * pref, const xmlChar * elem,
8214 const xmlChar ** prefix, xmlChar ** value,
8215 int *len, int *alloc)
8216{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008217 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008218 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008219 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008220
8221 *value = NULL;
8222 GROW;
8223 name = xmlParseQName(ctxt, prefix);
8224 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008225 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8226 "error parsing attribute name\n");
8227 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228 }
8229
8230 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008231 * get the type if needed
8232 */
8233 if (ctxt->attsSpecial != NULL) {
8234 int type;
8235
8236 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008237 pref, elem, *prefix, name);
8238 if (type != 0)
8239 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008240 }
8241
8242 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008243 * read the value
8244 */
8245 SKIP_BLANKS;
8246 if (RAW == '=') {
8247 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008248 SKIP_BLANKS;
8249 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8250 if (normalize) {
8251 /*
8252 * Sometimes a second normalisation pass for spaces is needed
8253 * but that only happens if charrefs or entities refernces
8254 * have been used in the attribute value, i.e. the attribute
8255 * value have been extracted in an allocated string already.
8256 */
8257 if (*alloc) {
8258 const xmlChar *val2;
8259
8260 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8261 if (val2 != NULL) {
8262 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008263 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008264 }
8265 }
8266 }
8267 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008269 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8270 "Specification mandate value for attribute %s\n",
8271 name);
8272 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008273 }
8274
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008275 if (*prefix == ctxt->str_xml) {
8276 /*
8277 * Check that xml:lang conforms to the specification
8278 * No more registered as an error, just generate a warning now
8279 * since this was deprecated in XML second edition
8280 */
8281 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8282 internal_val = xmlStrndup(val, *len);
8283 if (!xmlCheckLanguageID(internal_val)) {
8284 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8285 "Malformed value for xml:lang : %s\n",
8286 internal_val, NULL);
8287 }
8288 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008289
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008290 /*
8291 * Check that xml:space conforms to the specification
8292 */
8293 if (xmlStrEqual(name, BAD_CAST "space")) {
8294 internal_val = xmlStrndup(val, *len);
8295 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8296 *(ctxt->space) = 0;
8297 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8298 *(ctxt->space) = 1;
8299 else {
8300 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8301 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8302 internal_val, NULL);
8303 }
8304 }
8305 if (internal_val) {
8306 xmlFree(internal_val);
8307 }
8308 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008309
8310 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008311 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008313/**
8314 * xmlParseStartTag2:
8315 * @ctxt: an XML parser context
8316 *
8317 * parse a start of tag either for rule element or
8318 * EmptyElement. In both case we don't parse the tag closing chars.
8319 * This routine is called when running SAX2 parsing
8320 *
8321 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8322 *
8323 * [ WFC: Unique Att Spec ]
8324 * No attribute name may appear more than once in the same start-tag or
8325 * empty-element tag.
8326 *
8327 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8328 *
8329 * [ WFC: Unique Att Spec ]
8330 * No attribute name may appear more than once in the same start-tag or
8331 * empty-element tag.
8332 *
8333 * With namespace:
8334 *
8335 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8336 *
8337 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8338 *
8339 * Returns the element name parsed
8340 */
8341
8342static const xmlChar *
8343xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008344 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008345 const xmlChar *localname;
8346 const xmlChar *prefix;
8347 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008348 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008349 const xmlChar *nsname;
8350 xmlChar *attvalue;
8351 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008352 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008353 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008354 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008355 const xmlChar *base;
8356 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008357 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008358
8359 if (RAW != '<') return(NULL);
8360 NEXT1;
8361
8362 /*
8363 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8364 * point since the attribute values may be stored as pointers to
8365 * the buffer and calling SHRINK would destroy them !
8366 * The Shrinking is only possible once the full set of attribute
8367 * callbacks have been done.
8368 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008369reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008370 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008371 base = ctxt->input->base;
8372 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008373 oldline = ctxt->input->line;
8374 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008375 nbatts = 0;
8376 nratts = 0;
8377 nbdef = 0;
8378 nbNs = 0;
8379 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008380 /* Forget any namespaces added during an earlier parse of this element. */
8381 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008382
8383 localname = xmlParseQName(ctxt, &prefix);
8384 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008385 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8386 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008387 return(NULL);
8388 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008389 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008390
8391 /*
8392 * Now parse the attributes, it ends up with the ending
8393 *
8394 * (S Attribute)* S?
8395 */
8396 SKIP_BLANKS;
8397 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008398 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008399
8400 while ((RAW != '>') &&
8401 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008402 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008403 const xmlChar *q = CUR_PTR;
8404 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008405 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008406
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008407 attname = xmlParseAttribute2(ctxt, prefix, localname,
8408 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008409 if (ctxt->input->base != base) {
8410 if ((attvalue != NULL) && (alloc != 0))
8411 xmlFree(attvalue);
8412 attvalue = NULL;
8413 goto base_changed;
8414 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008415 if ((attname != NULL) && (attvalue != NULL)) {
8416 if (len < 0) len = xmlStrlen(attvalue);
8417 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008418 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8419 xmlURIPtr uri;
8420
8421 if (*URL != 0) {
8422 uri = xmlParseURI((const char *) URL);
8423 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008424 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8425 "xmlns: %s not a valid URI\n",
8426 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008427 } else {
8428 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008429 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8430 "xmlns: URI %s is not absolute\n",
8431 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008432 }
8433 xmlFreeURI(uri);
8434 }
8435 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008436 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008437 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008439 for (j = 1;j <= nbNs;j++)
8440 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8441 break;
8442 if (j <= nbNs)
8443 xmlErrAttributeDup(ctxt, NULL, attname);
8444 else
8445 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008446 if (alloc != 0) xmlFree(attvalue);
8447 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 continue;
8449 }
8450 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008451 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8452 xmlURIPtr uri;
8453
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008454 if (attname == ctxt->str_xml) {
8455 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008456 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8457 "xml namespace prefix mapped to wrong URI\n",
8458 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008459 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008460 /*
8461 * Do not keep a namespace definition node
8462 */
8463 if (alloc != 0) xmlFree(attvalue);
8464 SKIP_BLANKS;
8465 continue;
8466 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008467 uri = xmlParseURI((const char *) URL);
8468 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008469 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8470 "xmlns:%s: '%s' is not a valid URI\n",
8471 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008472 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008473 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008474 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8475 "xmlns:%s: URI %s is not absolute\n",
8476 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008477 }
8478 xmlFreeURI(uri);
8479 }
8480
Daniel Veillard0fb18932003-09-07 09:14:37 +00008481 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008482 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008483 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008484 for (j = 1;j <= nbNs;j++)
8485 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8486 break;
8487 if (j <= nbNs)
8488 xmlErrAttributeDup(ctxt, aprefix, attname);
8489 else
8490 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008491 if (alloc != 0) xmlFree(attvalue);
8492 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008493 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 continue;
8495 }
8496
8497 /*
8498 * Add the pair to atts
8499 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008500 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8501 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008502 if (attvalue[len] == 0)
8503 xmlFree(attvalue);
8504 goto failed;
8505 }
8506 maxatts = ctxt->maxatts;
8507 atts = ctxt->atts;
8508 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008509 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008510 atts[nbatts++] = attname;
8511 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008512 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008513 atts[nbatts++] = attvalue;
8514 attvalue += len;
8515 atts[nbatts++] = attvalue;
8516 /*
8517 * tag if some deallocation is needed
8518 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008519 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008520 } else {
8521 if ((attvalue != NULL) && (attvalue[len] == 0))
8522 xmlFree(attvalue);
8523 }
8524
8525failed:
8526
8527 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008528 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8530 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008531 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8533 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008534 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008535 }
8536 SKIP_BLANKS;
8537 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8538 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008540 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 break;
8542 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008544 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008545 }
8546
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008548 * The attributes defaulting
8549 */
8550 if (ctxt->attsDefault != NULL) {
8551 xmlDefAttrsPtr defaults;
8552
8553 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8554 if (defaults != NULL) {
8555 for (i = 0;i < defaults->nbAttrs;i++) {
8556 attname = defaults->values[4 * i];
8557 aprefix = defaults->values[4 * i + 1];
8558
8559 /*
8560 * special work for namespaces defaulted defs
8561 */
8562 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8563 /*
8564 * check that it's not a defined namespace
8565 */
8566 for (j = 1;j <= nbNs;j++)
8567 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8568 break;
8569 if (j <= nbNs) continue;
8570
8571 nsname = xmlGetNamespace(ctxt, NULL);
8572 if (nsname != defaults->values[4 * i + 2]) {
8573 if (nsPush(ctxt, NULL,
8574 defaults->values[4 * i + 2]) > 0)
8575 nbNs++;
8576 }
8577 } else if (aprefix == ctxt->str_xmlns) {
8578 /*
8579 * check that it's not a defined namespace
8580 */
8581 for (j = 1;j <= nbNs;j++)
8582 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8583 break;
8584 if (j <= nbNs) continue;
8585
8586 nsname = xmlGetNamespace(ctxt, attname);
8587 if (nsname != defaults->values[2]) {
8588 if (nsPush(ctxt, attname,
8589 defaults->values[4 * i + 2]) > 0)
8590 nbNs++;
8591 }
8592 } else {
8593 /*
8594 * check that it's not a defined attribute
8595 */
8596 for (j = 0;j < nbatts;j+=5) {
8597 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8598 break;
8599 }
8600 if (j < nbatts) continue;
8601
8602 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8603 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008604 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008605 }
8606 maxatts = ctxt->maxatts;
8607 atts = ctxt->atts;
8608 }
8609 atts[nbatts++] = attname;
8610 atts[nbatts++] = aprefix;
8611 if (aprefix == NULL)
8612 atts[nbatts++] = NULL;
8613 else
8614 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8615 atts[nbatts++] = defaults->values[4 * i + 2];
8616 atts[nbatts++] = defaults->values[4 * i + 3];
8617 nbdef++;
8618 }
8619 }
8620 }
8621 }
8622
Daniel Veillarde70c8772003-11-25 07:21:18 +00008623 /*
8624 * The attributes checkings
8625 */
8626 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008627 /*
8628 * The default namespace does not apply to attribute names.
8629 */
8630 if (atts[i + 1] != NULL) {
8631 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8632 if (nsname == NULL) {
8633 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8634 "Namespace prefix %s for %s on %s is not defined\n",
8635 atts[i + 1], atts[i], localname);
8636 }
8637 atts[i + 2] = nsname;
8638 } else
8639 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008640 /*
8641 * [ WFC: Unique Att Spec ]
8642 * No attribute name may appear more than once in the same
8643 * start-tag or empty-element tag.
8644 * As extended by the Namespace in XML REC.
8645 */
8646 for (j = 0; j < i;j += 5) {
8647 if (atts[i] == atts[j]) {
8648 if (atts[i+1] == atts[j+1]) {
8649 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8650 break;
8651 }
8652 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8653 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8654 "Namespaced Attribute %s in '%s' redefined\n",
8655 atts[i], nsname, NULL);
8656 break;
8657 }
8658 }
8659 }
8660 }
8661
Daniel Veillarde57ec792003-09-10 10:50:59 +00008662 nsname = xmlGetNamespace(ctxt, prefix);
8663 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008664 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8665 "Namespace prefix %s on %s is not defined\n",
8666 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008667 }
8668 *pref = prefix;
8669 *URI = nsname;
8670
8671 /*
8672 * SAX: Start of Element !
8673 */
8674 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8675 (!ctxt->disableSAX)) {
8676 if (nbNs > 0)
8677 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8678 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8679 nbatts / 5, nbdef, atts);
8680 else
8681 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8682 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8683 }
8684
8685 /*
8686 * Free up attribute allocated strings if needed
8687 */
8688 if (attval != 0) {
8689 for (i = 3,j = 0; j < nratts;i += 5,j++)
8690 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8691 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008692 }
8693
8694 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008695
8696base_changed:
8697 /*
8698 * the attribute strings are valid iif the base didn't changed
8699 */
8700 if (attval != 0) {
8701 for (i = 3,j = 0; j < nratts;i += 5,j++)
8702 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8703 xmlFree((xmlChar *) atts[i]);
8704 }
8705 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008706 ctxt->input->line = oldline;
8707 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 if (ctxt->wellFormed == 1) {
8709 goto reparse;
8710 }
8711 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712}
8713
8714/**
8715 * xmlParseEndTag2:
8716 * @ctxt: an XML parser context
8717 * @line: line of the start tag
8718 * @nsNr: number of namespaces on the start tag
8719 *
8720 * parse an end of tag
8721 *
8722 * [42] ETag ::= '</' Name S? '>'
8723 *
8724 * With namespace
8725 *
8726 * [NS 9] ETag ::= '</' QName S? '>'
8727 */
8728
8729static void
8730xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008731 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 const xmlChar *name;
8733
8734 GROW;
8735 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008736 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737 return;
8738 }
8739 SKIP(2);
8740
William M. Brack13dfa872004-09-18 04:52:08 +00008741 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008742 if (ctxt->input->cur[tlen] == '>') {
8743 ctxt->input->cur += tlen + 1;
8744 goto done;
8745 }
8746 ctxt->input->cur += tlen;
8747 name = (xmlChar*)1;
8748 } else {
8749 if (prefix == NULL)
8750 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8751 else
8752 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8753 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008754
8755 /*
8756 * We should definitely be at the ending "S? '>'" part
8757 */
8758 GROW;
8759 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008760 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008761 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762 } else
8763 NEXT1;
8764
8765 /*
8766 * [ WFC: Element Type Match ]
8767 * The Name in an element's end-tag must match the element type in the
8768 * start-tag.
8769 *
8770 */
8771 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008772 if (name == NULL) name = BAD_CAST "unparseable";
8773 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008774 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008775 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008776 }
8777
8778 /*
8779 * SAX: End of Tag
8780 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008781done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8783 (!ctxt->disableSAX))
8784 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8785
Daniel Veillard0fb18932003-09-07 09:14:37 +00008786 spacePop(ctxt);
8787 if (nsNr != 0)
8788 nsPop(ctxt, nsNr);
8789 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008790}
8791
8792/**
Owen Taylor3473f882001-02-23 17:55:21 +00008793 * xmlParseCDSect:
8794 * @ctxt: an XML parser context
8795 *
8796 * Parse escaped pure raw content.
8797 *
8798 * [18] CDSect ::= CDStart CData CDEnd
8799 *
8800 * [19] CDStart ::= '<![CDATA['
8801 *
8802 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8803 *
8804 * [21] CDEnd ::= ']]>'
8805 */
8806void
8807xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8808 xmlChar *buf = NULL;
8809 int len = 0;
8810 int size = XML_PARSER_BUFFER_SIZE;
8811 int r, rl;
8812 int s, sl;
8813 int cur, l;
8814 int count = 0;
8815
Daniel Veillard8f597c32003-10-06 08:19:27 +00008816 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008817 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008818 SKIP(9);
8819 } else
8820 return;
8821
8822 ctxt->instate = XML_PARSER_CDATA_SECTION;
8823 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008824 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008825 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008826 ctxt->instate = XML_PARSER_CONTENT;
8827 return;
8828 }
8829 NEXTL(rl);
8830 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008831 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008832 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008833 ctxt->instate = XML_PARSER_CONTENT;
8834 return;
8835 }
8836 NEXTL(sl);
8837 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008838 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008839 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008840 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008841 return;
8842 }
William M. Brack871611b2003-10-18 04:53:14 +00008843 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008844 ((r != ']') || (s != ']') || (cur != '>'))) {
8845 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008846 xmlChar *tmp;
8847
Owen Taylor3473f882001-02-23 17:55:21 +00008848 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008849 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8850 if (tmp == NULL) {
8851 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008852 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 return;
8854 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008855 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008856 }
8857 COPY_BUF(rl,buf,len,r);
8858 r = s;
8859 rl = sl;
8860 s = cur;
8861 sl = l;
8862 count++;
8863 if (count > 50) {
8864 GROW;
8865 count = 0;
8866 }
8867 NEXTL(l);
8868 cur = CUR_CHAR(l);
8869 }
8870 buf[len] = 0;
8871 ctxt->instate = XML_PARSER_CONTENT;
8872 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008873 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008874 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008875 xmlFree(buf);
8876 return;
8877 }
8878 NEXTL(l);
8879
8880 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008881 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008882 */
8883 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8884 if (ctxt->sax->cdataBlock != NULL)
8885 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008886 else if (ctxt->sax->characters != NULL)
8887 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008888 }
8889 xmlFree(buf);
8890}
8891
8892/**
8893 * xmlParseContent:
8894 * @ctxt: an XML parser context
8895 *
8896 * Parse a content:
8897 *
8898 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8899 */
8900
8901void
8902xmlParseContent(xmlParserCtxtPtr ctxt) {
8903 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008904 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008905 ((RAW != '<') || (NXT(1) != '/')) &&
8906 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008907 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008908 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008909 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008910
8911 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008912 * First case : a Processing Instruction.
8913 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008914 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008915 xmlParsePI(ctxt);
8916 }
8917
8918 /*
8919 * Second case : a CDSection
8920 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008921 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008922 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008923 xmlParseCDSect(ctxt);
8924 }
8925
8926 /*
8927 * Third case : a comment
8928 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008929 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008930 (NXT(2) == '-') && (NXT(3) == '-')) {
8931 xmlParseComment(ctxt);
8932 ctxt->instate = XML_PARSER_CONTENT;
8933 }
8934
8935 /*
8936 * Fourth case : a sub-element.
8937 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008938 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008939 xmlParseElement(ctxt);
8940 }
8941
8942 /*
8943 * Fifth case : a reference. If if has not been resolved,
8944 * parsing returns it's Name, create the node
8945 */
8946
Daniel Veillard21a0f912001-02-25 19:54:14 +00008947 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008948 xmlParseReference(ctxt);
8949 }
8950
8951 /*
8952 * Last case, text. Note that References are handled directly.
8953 */
8954 else {
8955 xmlParseCharData(ctxt, 0);
8956 }
8957
8958 GROW;
8959 /*
8960 * Pop-up of finished entities.
8961 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008962 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008963 xmlPopInput(ctxt);
8964 SHRINK;
8965
Daniel Veillardfdc91562002-07-01 21:52:03 +00008966 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008967 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8968 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008969 ctxt->instate = XML_PARSER_EOF;
8970 break;
8971 }
8972 }
8973}
8974
8975/**
8976 * xmlParseElement:
8977 * @ctxt: an XML parser context
8978 *
8979 * parse an XML element, this is highly recursive
8980 *
8981 * [39] element ::= EmptyElemTag | STag content ETag
8982 *
8983 * [ WFC: Element Type Match ]
8984 * The Name in an element's end-tag must match the element type in the
8985 * start-tag.
8986 *
Owen Taylor3473f882001-02-23 17:55:21 +00008987 */
8988
8989void
8990xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008991 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008992 const xmlChar *prefix;
8993 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008994 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008995 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008996 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008997 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008998
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008999 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
9000 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9001 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
9002 xmlParserMaxDepth);
9003 ctxt->instate = XML_PARSER_EOF;
9004 return;
9005 }
9006
Owen Taylor3473f882001-02-23 17:55:21 +00009007 /* Capture start position */
9008 if (ctxt->record_info) {
9009 node_info.begin_pos = ctxt->input->consumed +
9010 (CUR_PTR - ctxt->input->base);
9011 node_info.begin_line = ctxt->input->line;
9012 }
9013
9014 if (ctxt->spaceNr == 0)
9015 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009016 else if (*ctxt->space == -2)
9017 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009018 else
9019 spacePush(ctxt, *ctxt->space);
9020
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009021 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009022#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009023 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009024#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009025 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009026#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009027 else
9028 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009029#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009030 if (name == NULL) {
9031 spacePop(ctxt);
9032 return;
9033 }
9034 namePush(ctxt, name);
9035 ret = ctxt->node;
9036
Daniel Veillard4432df22003-09-28 18:58:27 +00009037#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009038 /*
9039 * [ VC: Root Element Type ]
9040 * The Name in the document type declaration must match the element
9041 * type of the root element.
9042 */
9043 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9044 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9045 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009046#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009047
9048 /*
9049 * Check for an Empty Element.
9050 */
9051 if ((RAW == '/') && (NXT(1) == '>')) {
9052 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053 if (ctxt->sax2) {
9054 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9055 (!ctxt->disableSAX))
9056 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009057#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 } else {
9059 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9060 (!ctxt->disableSAX))
9061 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009062#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009063 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009064 namePop(ctxt);
9065 spacePop(ctxt);
9066 if (nsNr != ctxt->nsNr)
9067 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009068 if ( ret != NULL && ctxt->record_info ) {
9069 node_info.end_pos = ctxt->input->consumed +
9070 (CUR_PTR - ctxt->input->base);
9071 node_info.end_line = ctxt->input->line;
9072 node_info.node = ret;
9073 xmlParserAddNodeInfo(ctxt, &node_info);
9074 }
9075 return;
9076 }
9077 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009078 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009079 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009080 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9081 "Couldn't find end of Start Tag %s line %d\n",
9082 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009083
9084 /*
9085 * end of parsing of this node.
9086 */
9087 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009088 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009089 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009090 if (nsNr != ctxt->nsNr)
9091 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009092
9093 /*
9094 * Capture end position and add node
9095 */
9096 if ( ret != NULL && ctxt->record_info ) {
9097 node_info.end_pos = ctxt->input->consumed +
9098 (CUR_PTR - ctxt->input->base);
9099 node_info.end_line = ctxt->input->line;
9100 node_info.node = ret;
9101 xmlParserAddNodeInfo(ctxt, &node_info);
9102 }
9103 return;
9104 }
9105
9106 /*
9107 * Parse the content of the element:
9108 */
9109 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009110 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009111 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009112 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009113 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009114
9115 /*
9116 * end of parsing of this node.
9117 */
9118 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009119 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009120 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009121 if (nsNr != ctxt->nsNr)
9122 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009123 return;
9124 }
9125
9126 /*
9127 * parse the end of tag: '</' should be here.
9128 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009129 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009130 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009131 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009132 }
9133#ifdef LIBXML_SAX1_ENABLED
9134 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009135 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009136#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009137
9138 /*
9139 * Capture end position and add node
9140 */
9141 if ( ret != NULL && ctxt->record_info ) {
9142 node_info.end_pos = ctxt->input->consumed +
9143 (CUR_PTR - ctxt->input->base);
9144 node_info.end_line = ctxt->input->line;
9145 node_info.node = ret;
9146 xmlParserAddNodeInfo(ctxt, &node_info);
9147 }
9148}
9149
9150/**
9151 * xmlParseVersionNum:
9152 * @ctxt: an XML parser context
9153 *
9154 * parse the XML version value.
9155 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009156 * [26] VersionNum ::= '1.' [0-9]+
9157 *
9158 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009159 *
9160 * Returns the string giving the XML version number, or NULL
9161 */
9162xmlChar *
9163xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9164 xmlChar *buf = NULL;
9165 int len = 0;
9166 int size = 10;
9167 xmlChar cur;
9168
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009169 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009170 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009171 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009172 return(NULL);
9173 }
9174 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009175 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009176 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009177 return(NULL);
9178 }
9179 buf[len++] = cur;
9180 NEXT;
9181 cur=CUR;
9182 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009183 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009184 return(NULL);
9185 }
9186 buf[len++] = cur;
9187 NEXT;
9188 cur=CUR;
9189 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009190 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009191 xmlChar *tmp;
9192
Owen Taylor3473f882001-02-23 17:55:21 +00009193 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009194 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9195 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009196 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009198 return(NULL);
9199 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009200 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009201 }
9202 buf[len++] = cur;
9203 NEXT;
9204 cur=CUR;
9205 }
9206 buf[len] = 0;
9207 return(buf);
9208}
9209
9210/**
9211 * xmlParseVersionInfo:
9212 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009213 *
Owen Taylor3473f882001-02-23 17:55:21 +00009214 * parse the XML version.
9215 *
9216 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009217 *
Owen Taylor3473f882001-02-23 17:55:21 +00009218 * [25] Eq ::= S? '=' S?
9219 *
9220 * Returns the version string, e.g. "1.0"
9221 */
9222
9223xmlChar *
9224xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9225 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009226
Daniel Veillarda07050d2003-10-19 14:46:32 +00009227 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009228 SKIP(7);
9229 SKIP_BLANKS;
9230 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009231 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009232 return(NULL);
9233 }
9234 NEXT;
9235 SKIP_BLANKS;
9236 if (RAW == '"') {
9237 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009238 version = xmlParseVersionNum(ctxt);
9239 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009240 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009241 } else
9242 NEXT;
9243 } else if (RAW == '\''){
9244 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009245 version = xmlParseVersionNum(ctxt);
9246 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009247 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009248 } else
9249 NEXT;
9250 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009251 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009252 }
9253 }
9254 return(version);
9255}
9256
9257/**
9258 * xmlParseEncName:
9259 * @ctxt: an XML parser context
9260 *
9261 * parse the XML encoding name
9262 *
9263 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9264 *
9265 * Returns the encoding name value or NULL
9266 */
9267xmlChar *
9268xmlParseEncName(xmlParserCtxtPtr ctxt) {
9269 xmlChar *buf = NULL;
9270 int len = 0;
9271 int size = 10;
9272 xmlChar cur;
9273
9274 cur = CUR;
9275 if (((cur >= 'a') && (cur <= 'z')) ||
9276 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009277 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009278 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009279 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009280 return(NULL);
9281 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009282
Owen Taylor3473f882001-02-23 17:55:21 +00009283 buf[len++] = cur;
9284 NEXT;
9285 cur = CUR;
9286 while (((cur >= 'a') && (cur <= 'z')) ||
9287 ((cur >= 'A') && (cur <= 'Z')) ||
9288 ((cur >= '0') && (cur <= '9')) ||
9289 (cur == '.') || (cur == '_') ||
9290 (cur == '-')) {
9291 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009292 xmlChar *tmp;
9293
Owen Taylor3473f882001-02-23 17:55:21 +00009294 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009295 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9296 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009297 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009298 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009299 return(NULL);
9300 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009301 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009302 }
9303 buf[len++] = cur;
9304 NEXT;
9305 cur = CUR;
9306 if (cur == 0) {
9307 SHRINK;
9308 GROW;
9309 cur = CUR;
9310 }
9311 }
9312 buf[len] = 0;
9313 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009314 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009315 }
9316 return(buf);
9317}
9318
9319/**
9320 * xmlParseEncodingDecl:
9321 * @ctxt: an XML parser context
9322 *
9323 * parse the XML encoding declaration
9324 *
9325 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9326 *
9327 * this setups the conversion filters.
9328 *
9329 * Returns the encoding value or NULL
9330 */
9331
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009332const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009333xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9334 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009335
9336 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009337 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009338 SKIP(8);
9339 SKIP_BLANKS;
9340 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009341 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009342 return(NULL);
9343 }
9344 NEXT;
9345 SKIP_BLANKS;
9346 if (RAW == '"') {
9347 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009348 encoding = xmlParseEncName(ctxt);
9349 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009350 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009351 } else
9352 NEXT;
9353 } else if (RAW == '\''){
9354 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009355 encoding = xmlParseEncName(ctxt);
9356 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009357 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009358 } else
9359 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009360 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009361 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009362 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009363 /*
9364 * UTF-16 encoding stwich has already taken place at this stage,
9365 * more over the little-endian/big-endian selection is already done
9366 */
9367 if ((encoding != NULL) &&
9368 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9369 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009370 if (ctxt->encoding != NULL)
9371 xmlFree((xmlChar *) ctxt->encoding);
9372 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009373 }
9374 /*
9375 * UTF-8 encoding is handled natively
9376 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009377 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009378 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9379 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009380 if (ctxt->encoding != NULL)
9381 xmlFree((xmlChar *) ctxt->encoding);
9382 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009383 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009384 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009385 xmlCharEncodingHandlerPtr handler;
9386
9387 if (ctxt->input->encoding != NULL)
9388 xmlFree((xmlChar *) ctxt->input->encoding);
9389 ctxt->input->encoding = encoding;
9390
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009391 handler = xmlFindCharEncodingHandler((const char *) encoding);
9392 if (handler != NULL) {
9393 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009394 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009395 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009396 "Unsupported encoding %s\n", encoding);
9397 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009398 }
9399 }
9400 }
9401 return(encoding);
9402}
9403
9404/**
9405 * xmlParseSDDecl:
9406 * @ctxt: an XML parser context
9407 *
9408 * parse the XML standalone declaration
9409 *
9410 * [32] SDDecl ::= S 'standalone' Eq
9411 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9412 *
9413 * [ VC: Standalone Document Declaration ]
9414 * TODO The standalone document declaration must have the value "no"
9415 * if any external markup declarations contain declarations of:
9416 * - attributes with default values, if elements to which these
9417 * attributes apply appear in the document without specifications
9418 * of values for these attributes, or
9419 * - entities (other than amp, lt, gt, apos, quot), if references
9420 * to those entities appear in the document, or
9421 * - attributes with values subject to normalization, where the
9422 * attribute appears in the document with a value which will change
9423 * as a result of normalization, or
9424 * - element types with element content, if white space occurs directly
9425 * within any instance of those types.
9426 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009427 * Returns:
9428 * 1 if standalone="yes"
9429 * 0 if standalone="no"
9430 * -2 if standalone attribute is missing or invalid
9431 * (A standalone value of -2 means that the XML declaration was found,
9432 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009433 */
9434
9435int
9436xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009437 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009438
9439 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009440 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009441 SKIP(10);
9442 SKIP_BLANKS;
9443 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009444 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009445 return(standalone);
9446 }
9447 NEXT;
9448 SKIP_BLANKS;
9449 if (RAW == '\''){
9450 NEXT;
9451 if ((RAW == 'n') && (NXT(1) == 'o')) {
9452 standalone = 0;
9453 SKIP(2);
9454 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9455 (NXT(2) == 's')) {
9456 standalone = 1;
9457 SKIP(3);
9458 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009459 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009460 }
9461 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009462 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009463 } else
9464 NEXT;
9465 } else if (RAW == '"'){
9466 NEXT;
9467 if ((RAW == 'n') && (NXT(1) == 'o')) {
9468 standalone = 0;
9469 SKIP(2);
9470 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9471 (NXT(2) == 's')) {
9472 standalone = 1;
9473 SKIP(3);
9474 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009475 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009476 }
9477 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009478 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009479 } else
9480 NEXT;
9481 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009482 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009483 }
9484 }
9485 return(standalone);
9486}
9487
9488/**
9489 * xmlParseXMLDecl:
9490 * @ctxt: an XML parser context
9491 *
9492 * parse an XML declaration header
9493 *
9494 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9495 */
9496
9497void
9498xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9499 xmlChar *version;
9500
9501 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009502 * This value for standalone indicates that the document has an
9503 * XML declaration but it does not have a standalone attribute.
9504 * It will be overwritten later if a standalone attribute is found.
9505 */
9506 ctxt->input->standalone = -2;
9507
9508 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009509 * We know that '<?xml' is here.
9510 */
9511 SKIP(5);
9512
William M. Brack76e95df2003-10-18 16:20:14 +00009513 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9515 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009516 }
9517 SKIP_BLANKS;
9518
9519 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009520 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009521 */
9522 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009523 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009524 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009525 } else {
9526 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9527 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009528 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009529 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009530 if (ctxt->options & XML_PARSE_OLD10) {
9531 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9532 "Unsupported version '%s'\n",
9533 version);
9534 } else {
9535 if ((version[0] == '1') && ((version[1] == '.'))) {
9536 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9537 "Unsupported version '%s'\n",
9538 version, NULL);
9539 } else {
9540 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9541 "Unsupported version '%s'\n",
9542 version);
9543 }
9544 }
Daniel Veillard19840942001-11-29 16:11:38 +00009545 }
9546 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009547 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009548 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009549 }
Owen Taylor3473f882001-02-23 17:55:21 +00009550
9551 /*
9552 * We may have the encoding declaration
9553 */
William M. Brack76e95df2003-10-18 16:20:14 +00009554 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009555 if ((RAW == '?') && (NXT(1) == '>')) {
9556 SKIP(2);
9557 return;
9558 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009560 }
9561 xmlParseEncodingDecl(ctxt);
9562 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9563 /*
9564 * The XML REC instructs us to stop parsing right here
9565 */
9566 return;
9567 }
9568
9569 /*
9570 * We may have the standalone status.
9571 */
William M. Brack76e95df2003-10-18 16:20:14 +00009572 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009573 if ((RAW == '?') && (NXT(1) == '>')) {
9574 SKIP(2);
9575 return;
9576 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009578 }
9579 SKIP_BLANKS;
9580 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9581
9582 SKIP_BLANKS;
9583 if ((RAW == '?') && (NXT(1) == '>')) {
9584 SKIP(2);
9585 } else if (RAW == '>') {
9586 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009587 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009588 NEXT;
9589 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009590 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009591 MOVETO_ENDTAG(CUR_PTR);
9592 NEXT;
9593 }
9594}
9595
9596/**
9597 * xmlParseMisc:
9598 * @ctxt: an XML parser context
9599 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009600 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009601 *
9602 * [27] Misc ::= Comment | PI | S
9603 */
9604
9605void
9606xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009607 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009608 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009609 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009610 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009611 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009612 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009613 NEXT;
9614 } else
9615 xmlParseComment(ctxt);
9616 }
9617}
9618
9619/**
9620 * xmlParseDocument:
9621 * @ctxt: an XML parser context
9622 *
9623 * parse an XML document (and build a tree if using the standard SAX
9624 * interface).
9625 *
9626 * [1] document ::= prolog element Misc*
9627 *
9628 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9629 *
9630 * Returns 0, -1 in case of error. the parser context is augmented
9631 * as a result of the parsing.
9632 */
9633
9634int
9635xmlParseDocument(xmlParserCtxtPtr ctxt) {
9636 xmlChar start[4];
9637 xmlCharEncoding enc;
9638
9639 xmlInitParser();
9640
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009641 if ((ctxt == NULL) || (ctxt->input == NULL))
9642 return(-1);
9643
Owen Taylor3473f882001-02-23 17:55:21 +00009644 GROW;
9645
9646 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009647 * SAX: detecting the level.
9648 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009649 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009650
9651 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009652 * SAX: beginning of the document processing.
9653 */
9654 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9655 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9656
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009657 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9658 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009659 /*
9660 * Get the 4 first bytes and decode the charset
9661 * if enc != XML_CHAR_ENCODING_NONE
9662 * plug some encoding conversion routines.
9663 */
9664 start[0] = RAW;
9665 start[1] = NXT(1);
9666 start[2] = NXT(2);
9667 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009668 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009669 if (enc != XML_CHAR_ENCODING_NONE) {
9670 xmlSwitchEncoding(ctxt, enc);
9671 }
Owen Taylor3473f882001-02-23 17:55:21 +00009672 }
9673
9674
9675 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009676 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009677 }
9678
9679 /*
9680 * Check for the XMLDecl in the Prolog.
9681 */
9682 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009683 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009684
9685 /*
9686 * Note that we will switch encoding on the fly.
9687 */
9688 xmlParseXMLDecl(ctxt);
9689 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9690 /*
9691 * The XML REC instructs us to stop parsing right here
9692 */
9693 return(-1);
9694 }
9695 ctxt->standalone = ctxt->input->standalone;
9696 SKIP_BLANKS;
9697 } else {
9698 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9699 }
9700 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9701 ctxt->sax->startDocument(ctxt->userData);
9702
9703 /*
9704 * The Misc part of the Prolog
9705 */
9706 GROW;
9707 xmlParseMisc(ctxt);
9708
9709 /*
9710 * Then possibly doc type declaration(s) and more Misc
9711 * (doctypedecl Misc*)?
9712 */
9713 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009714 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009715
9716 ctxt->inSubset = 1;
9717 xmlParseDocTypeDecl(ctxt);
9718 if (RAW == '[') {
9719 ctxt->instate = XML_PARSER_DTD;
9720 xmlParseInternalSubset(ctxt);
9721 }
9722
9723 /*
9724 * Create and update the external subset.
9725 */
9726 ctxt->inSubset = 2;
9727 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9728 (!ctxt->disableSAX))
9729 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9730 ctxt->extSubSystem, ctxt->extSubURI);
9731 ctxt->inSubset = 0;
9732
Daniel Veillardac4118d2008-01-11 05:27:32 +00009733 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009734
9735 ctxt->instate = XML_PARSER_PROLOG;
9736 xmlParseMisc(ctxt);
9737 }
9738
9739 /*
9740 * Time to start parsing the tree itself
9741 */
9742 GROW;
9743 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009744 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9745 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009746 } else {
9747 ctxt->instate = XML_PARSER_CONTENT;
9748 xmlParseElement(ctxt);
9749 ctxt->instate = XML_PARSER_EPILOG;
9750
9751
9752 /*
9753 * The Misc part at the end
9754 */
9755 xmlParseMisc(ctxt);
9756
Daniel Veillard561b7f82002-03-20 21:55:57 +00009757 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009758 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009759 }
9760 ctxt->instate = XML_PARSER_EOF;
9761 }
9762
9763 /*
9764 * SAX: end of the document processing.
9765 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009766 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009767 ctxt->sax->endDocument(ctxt->userData);
9768
Daniel Veillard5997aca2002-03-18 18:36:20 +00009769 /*
9770 * Remove locally kept entity definitions if the tree was not built
9771 */
9772 if ((ctxt->myDoc != NULL) &&
9773 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9774 xmlFreeDoc(ctxt->myDoc);
9775 ctxt->myDoc = NULL;
9776 }
9777
Daniel Veillardc7612992002-02-17 22:47:37 +00009778 if (! ctxt->wellFormed) {
9779 ctxt->valid = 0;
9780 return(-1);
9781 }
Owen Taylor3473f882001-02-23 17:55:21 +00009782 return(0);
9783}
9784
9785/**
9786 * xmlParseExtParsedEnt:
9787 * @ctxt: an XML parser context
9788 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009789 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009790 * An external general parsed entity is well-formed if it matches the
9791 * production labeled extParsedEnt.
9792 *
9793 * [78] extParsedEnt ::= TextDecl? content
9794 *
9795 * Returns 0, -1 in case of error. the parser context is augmented
9796 * as a result of the parsing.
9797 */
9798
9799int
9800xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9801 xmlChar start[4];
9802 xmlCharEncoding enc;
9803
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009804 if ((ctxt == NULL) || (ctxt->input == NULL))
9805 return(-1);
9806
Owen Taylor3473f882001-02-23 17:55:21 +00009807 xmlDefaultSAXHandlerInit();
9808
Daniel Veillard309f81d2003-09-23 09:02:53 +00009809 xmlDetectSAX2(ctxt);
9810
Owen Taylor3473f882001-02-23 17:55:21 +00009811 GROW;
9812
9813 /*
9814 * SAX: beginning of the document processing.
9815 */
9816 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9817 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9818
9819 /*
9820 * Get the 4 first bytes and decode the charset
9821 * if enc != XML_CHAR_ENCODING_NONE
9822 * plug some encoding conversion routines.
9823 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009824 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9825 start[0] = RAW;
9826 start[1] = NXT(1);
9827 start[2] = NXT(2);
9828 start[3] = NXT(3);
9829 enc = xmlDetectCharEncoding(start, 4);
9830 if (enc != XML_CHAR_ENCODING_NONE) {
9831 xmlSwitchEncoding(ctxt, enc);
9832 }
Owen Taylor3473f882001-02-23 17:55:21 +00009833 }
9834
9835
9836 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009837 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009838 }
9839
9840 /*
9841 * Check for the XMLDecl in the Prolog.
9842 */
9843 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009844 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009845
9846 /*
9847 * Note that we will switch encoding on the fly.
9848 */
9849 xmlParseXMLDecl(ctxt);
9850 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9851 /*
9852 * The XML REC instructs us to stop parsing right here
9853 */
9854 return(-1);
9855 }
9856 SKIP_BLANKS;
9857 } else {
9858 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9859 }
9860 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9861 ctxt->sax->startDocument(ctxt->userData);
9862
9863 /*
9864 * Doing validity checking on chunk doesn't make sense
9865 */
9866 ctxt->instate = XML_PARSER_CONTENT;
9867 ctxt->validate = 0;
9868 ctxt->loadsubset = 0;
9869 ctxt->depth = 0;
9870
9871 xmlParseContent(ctxt);
9872
9873 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009874 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009876 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009877 }
9878
9879 /*
9880 * SAX: end of the document processing.
9881 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009882 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009883 ctxt->sax->endDocument(ctxt->userData);
9884
9885 if (! ctxt->wellFormed) return(-1);
9886 return(0);
9887}
9888
Daniel Veillard73b013f2003-09-30 12:36:01 +00009889#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009890/************************************************************************
9891 * *
9892 * Progressive parsing interfaces *
9893 * *
9894 ************************************************************************/
9895
9896/**
9897 * xmlParseLookupSequence:
9898 * @ctxt: an XML parser context
9899 * @first: the first char to lookup
9900 * @next: the next char to lookup or zero
9901 * @third: the next char to lookup or zero
9902 *
9903 * Try to find if a sequence (first, next, third) or just (first next) or
9904 * (first) is available in the input stream.
9905 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9906 * to avoid rescanning sequences of bytes, it DOES change the state of the
9907 * parser, do not use liberally.
9908 *
9909 * Returns the index to the current parsing point if the full sequence
9910 * is available, -1 otherwise.
9911 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009912static int
Owen Taylor3473f882001-02-23 17:55:21 +00009913xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9914 xmlChar next, xmlChar third) {
9915 int base, len;
9916 xmlParserInputPtr in;
9917 const xmlChar *buf;
9918
9919 in = ctxt->input;
9920 if (in == NULL) return(-1);
9921 base = in->cur - in->base;
9922 if (base < 0) return(-1);
9923 if (ctxt->checkIndex > base)
9924 base = ctxt->checkIndex;
9925 if (in->buf == NULL) {
9926 buf = in->base;
9927 len = in->length;
9928 } else {
9929 buf = in->buf->buffer->content;
9930 len = in->buf->buffer->use;
9931 }
9932 /* take into account the sequence length */
9933 if (third) len -= 2;
9934 else if (next) len --;
9935 for (;base < len;base++) {
9936 if (buf[base] == first) {
9937 if (third != 0) {
9938 if ((buf[base + 1] != next) ||
9939 (buf[base + 2] != third)) continue;
9940 } else if (next != 0) {
9941 if (buf[base + 1] != next) continue;
9942 }
9943 ctxt->checkIndex = 0;
9944#ifdef DEBUG_PUSH
9945 if (next == 0)
9946 xmlGenericError(xmlGenericErrorContext,
9947 "PP: lookup '%c' found at %d\n",
9948 first, base);
9949 else if (third == 0)
9950 xmlGenericError(xmlGenericErrorContext,
9951 "PP: lookup '%c%c' found at %d\n",
9952 first, next, base);
9953 else
9954 xmlGenericError(xmlGenericErrorContext,
9955 "PP: lookup '%c%c%c' found at %d\n",
9956 first, next, third, base);
9957#endif
9958 return(base - (in->cur - in->base));
9959 }
9960 }
9961 ctxt->checkIndex = base;
9962#ifdef DEBUG_PUSH
9963 if (next == 0)
9964 xmlGenericError(xmlGenericErrorContext,
9965 "PP: lookup '%c' failed\n", first);
9966 else if (third == 0)
9967 xmlGenericError(xmlGenericErrorContext,
9968 "PP: lookup '%c%c' failed\n", first, next);
9969 else
9970 xmlGenericError(xmlGenericErrorContext,
9971 "PP: lookup '%c%c%c' failed\n", first, next, third);
9972#endif
9973 return(-1);
9974}
9975
9976/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009977 * xmlParseGetLasts:
9978 * @ctxt: an XML parser context
9979 * @lastlt: pointer to store the last '<' from the input
9980 * @lastgt: pointer to store the last '>' from the input
9981 *
9982 * Lookup the last < and > in the current chunk
9983 */
9984static void
9985xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9986 const xmlChar **lastgt) {
9987 const xmlChar *tmp;
9988
9989 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9990 xmlGenericError(xmlGenericErrorContext,
9991 "Internal error: xmlParseGetLasts\n");
9992 return;
9993 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009994 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009995 tmp = ctxt->input->end;
9996 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009997 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009998 if (tmp < ctxt->input->base) {
9999 *lastlt = NULL;
10000 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010001 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010002 *lastlt = tmp;
10003 tmp++;
10004 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10005 if (*tmp == '\'') {
10006 tmp++;
10007 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10008 if (tmp < ctxt->input->end) tmp++;
10009 } else if (*tmp == '"') {
10010 tmp++;
10011 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10012 if (tmp < ctxt->input->end) tmp++;
10013 } else
10014 tmp++;
10015 }
10016 if (tmp < ctxt->input->end)
10017 *lastgt = tmp;
10018 else {
10019 tmp = *lastlt;
10020 tmp--;
10021 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10022 if (tmp >= ctxt->input->base)
10023 *lastgt = tmp;
10024 else
10025 *lastgt = NULL;
10026 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010027 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010028 } else {
10029 *lastlt = NULL;
10030 *lastgt = NULL;
10031 }
10032}
10033/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010034 * xmlCheckCdataPush:
10035 * @cur: pointer to the bock of characters
10036 * @len: length of the block in bytes
10037 *
10038 * Check that the block of characters is okay as SCdata content [20]
10039 *
10040 * Returns the number of bytes to pass if okay, a negative index where an
10041 * UTF-8 error occured otherwise
10042 */
10043static int
10044xmlCheckCdataPush(const xmlChar *utf, int len) {
10045 int ix;
10046 unsigned char c;
10047 int codepoint;
10048
10049 if ((utf == NULL) || (len <= 0))
10050 return(0);
10051
10052 for (ix = 0; ix < len;) { /* string is 0-terminated */
10053 c = utf[ix];
10054 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10055 if (c >= 0x20)
10056 ix++;
10057 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10058 ix++;
10059 else
10060 return(-ix);
10061 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10062 if (ix + 2 > len) return(ix);
10063 if ((utf[ix+1] & 0xc0 ) != 0x80)
10064 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010065 codepoint = (utf[ix] & 0x1f) << 6;
10066 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010067 if (!xmlIsCharQ(codepoint))
10068 return(-ix);
10069 ix += 2;
10070 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10071 if (ix + 3 > len) return(ix);
10072 if (((utf[ix+1] & 0xc0) != 0x80) ||
10073 ((utf[ix+2] & 0xc0) != 0x80))
10074 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010075 codepoint = (utf[ix] & 0xf) << 12;
10076 codepoint |= (utf[ix+1] & 0x3f) << 6;
10077 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010078 if (!xmlIsCharQ(codepoint))
10079 return(-ix);
10080 ix += 3;
10081 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10082 if (ix + 4 > len) return(ix);
10083 if (((utf[ix+1] & 0xc0) != 0x80) ||
10084 ((utf[ix+2] & 0xc0) != 0x80) ||
10085 ((utf[ix+3] & 0xc0) != 0x80))
10086 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010087 codepoint = (utf[ix] & 0x7) << 18;
10088 codepoint |= (utf[ix+1] & 0x3f) << 12;
10089 codepoint |= (utf[ix+2] & 0x3f) << 6;
10090 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010091 if (!xmlIsCharQ(codepoint))
10092 return(-ix);
10093 ix += 4;
10094 } else /* unknown encoding */
10095 return(-ix);
10096 }
10097 return(ix);
10098}
10099
10100/**
Owen Taylor3473f882001-02-23 17:55:21 +000010101 * xmlParseTryOrFinish:
10102 * @ctxt: an XML parser context
10103 * @terminate: last chunk indicator
10104 *
10105 * Try to progress on parsing
10106 *
10107 * Returns zero if no parsing was possible
10108 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010109static int
Owen Taylor3473f882001-02-23 17:55:21 +000010110xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10111 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010112 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010113 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010114 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010115
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010116 if (ctxt->input == NULL)
10117 return(0);
10118
Owen Taylor3473f882001-02-23 17:55:21 +000010119#ifdef DEBUG_PUSH
10120 switch (ctxt->instate) {
10121 case XML_PARSER_EOF:
10122 xmlGenericError(xmlGenericErrorContext,
10123 "PP: try EOF\n"); break;
10124 case XML_PARSER_START:
10125 xmlGenericError(xmlGenericErrorContext,
10126 "PP: try START\n"); break;
10127 case XML_PARSER_MISC:
10128 xmlGenericError(xmlGenericErrorContext,
10129 "PP: try MISC\n");break;
10130 case XML_PARSER_COMMENT:
10131 xmlGenericError(xmlGenericErrorContext,
10132 "PP: try COMMENT\n");break;
10133 case XML_PARSER_PROLOG:
10134 xmlGenericError(xmlGenericErrorContext,
10135 "PP: try PROLOG\n");break;
10136 case XML_PARSER_START_TAG:
10137 xmlGenericError(xmlGenericErrorContext,
10138 "PP: try START_TAG\n");break;
10139 case XML_PARSER_CONTENT:
10140 xmlGenericError(xmlGenericErrorContext,
10141 "PP: try CONTENT\n");break;
10142 case XML_PARSER_CDATA_SECTION:
10143 xmlGenericError(xmlGenericErrorContext,
10144 "PP: try CDATA_SECTION\n");break;
10145 case XML_PARSER_END_TAG:
10146 xmlGenericError(xmlGenericErrorContext,
10147 "PP: try END_TAG\n");break;
10148 case XML_PARSER_ENTITY_DECL:
10149 xmlGenericError(xmlGenericErrorContext,
10150 "PP: try ENTITY_DECL\n");break;
10151 case XML_PARSER_ENTITY_VALUE:
10152 xmlGenericError(xmlGenericErrorContext,
10153 "PP: try ENTITY_VALUE\n");break;
10154 case XML_PARSER_ATTRIBUTE_VALUE:
10155 xmlGenericError(xmlGenericErrorContext,
10156 "PP: try ATTRIBUTE_VALUE\n");break;
10157 case XML_PARSER_DTD:
10158 xmlGenericError(xmlGenericErrorContext,
10159 "PP: try DTD\n");break;
10160 case XML_PARSER_EPILOG:
10161 xmlGenericError(xmlGenericErrorContext,
10162 "PP: try EPILOG\n");break;
10163 case XML_PARSER_PI:
10164 xmlGenericError(xmlGenericErrorContext,
10165 "PP: try PI\n");break;
10166 case XML_PARSER_IGNORE:
10167 xmlGenericError(xmlGenericErrorContext,
10168 "PP: try IGNORE\n");break;
10169 }
10170#endif
10171
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010172 if ((ctxt->input != NULL) &&
10173 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010174 xmlSHRINK(ctxt);
10175 ctxt->checkIndex = 0;
10176 }
10177 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010178
Daniel Veillarda880b122003-04-21 21:36:41 +000010179 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010180 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010181 return(0);
10182
10183
Owen Taylor3473f882001-02-23 17:55:21 +000010184 /*
10185 * Pop-up of finished entities.
10186 */
10187 while ((RAW == 0) && (ctxt->inputNr > 1))
10188 xmlPopInput(ctxt);
10189
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010190 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010191 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010192 avail = ctxt->input->length -
10193 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010194 else {
10195 /*
10196 * If we are operating on converted input, try to flush
10197 * remainng chars to avoid them stalling in the non-converted
10198 * buffer.
10199 */
10200 if ((ctxt->input->buf->raw != NULL) &&
10201 (ctxt->input->buf->raw->use > 0)) {
10202 int base = ctxt->input->base -
10203 ctxt->input->buf->buffer->content;
10204 int current = ctxt->input->cur - ctxt->input->base;
10205
10206 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10207 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10208 ctxt->input->cur = ctxt->input->base + current;
10209 ctxt->input->end =
10210 &ctxt->input->buf->buffer->content[
10211 ctxt->input->buf->buffer->use];
10212 }
10213 avail = ctxt->input->buf->buffer->use -
10214 (ctxt->input->cur - ctxt->input->base);
10215 }
Owen Taylor3473f882001-02-23 17:55:21 +000010216 if (avail < 1)
10217 goto done;
10218 switch (ctxt->instate) {
10219 case XML_PARSER_EOF:
10220 /*
10221 * Document parsing is done !
10222 */
10223 goto done;
10224 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010225 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10226 xmlChar start[4];
10227 xmlCharEncoding enc;
10228
10229 /*
10230 * Very first chars read from the document flow.
10231 */
10232 if (avail < 4)
10233 goto done;
10234
10235 /*
10236 * Get the 4 first bytes and decode the charset
10237 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010238 * plug some encoding conversion routines,
10239 * else xmlSwitchEncoding will set to (default)
10240 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010241 */
10242 start[0] = RAW;
10243 start[1] = NXT(1);
10244 start[2] = NXT(2);
10245 start[3] = NXT(3);
10246 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010247 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010248 break;
10249 }
Owen Taylor3473f882001-02-23 17:55:21 +000010250
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010251 if (avail < 2)
10252 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010253 cur = ctxt->input->cur[0];
10254 next = ctxt->input->cur[1];
10255 if (cur == 0) {
10256 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10257 ctxt->sax->setDocumentLocator(ctxt->userData,
10258 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010259 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010260 ctxt->instate = XML_PARSER_EOF;
10261#ifdef DEBUG_PUSH
10262 xmlGenericError(xmlGenericErrorContext,
10263 "PP: entering EOF\n");
10264#endif
10265 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10266 ctxt->sax->endDocument(ctxt->userData);
10267 goto done;
10268 }
10269 if ((cur == '<') && (next == '?')) {
10270 /* PI or XML decl */
10271 if (avail < 5) return(ret);
10272 if ((!terminate) &&
10273 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10274 return(ret);
10275 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10276 ctxt->sax->setDocumentLocator(ctxt->userData,
10277 &xmlDefaultSAXLocator);
10278 if ((ctxt->input->cur[2] == 'x') &&
10279 (ctxt->input->cur[3] == 'm') &&
10280 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010281 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010282 ret += 5;
10283#ifdef DEBUG_PUSH
10284 xmlGenericError(xmlGenericErrorContext,
10285 "PP: Parsing XML Decl\n");
10286#endif
10287 xmlParseXMLDecl(ctxt);
10288 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10289 /*
10290 * The XML REC instructs us to stop parsing right
10291 * here
10292 */
10293 ctxt->instate = XML_PARSER_EOF;
10294 return(0);
10295 }
10296 ctxt->standalone = ctxt->input->standalone;
10297 if ((ctxt->encoding == NULL) &&
10298 (ctxt->input->encoding != NULL))
10299 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10300 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10301 (!ctxt->disableSAX))
10302 ctxt->sax->startDocument(ctxt->userData);
10303 ctxt->instate = XML_PARSER_MISC;
10304#ifdef DEBUG_PUSH
10305 xmlGenericError(xmlGenericErrorContext,
10306 "PP: entering MISC\n");
10307#endif
10308 } else {
10309 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10310 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10311 (!ctxt->disableSAX))
10312 ctxt->sax->startDocument(ctxt->userData);
10313 ctxt->instate = XML_PARSER_MISC;
10314#ifdef DEBUG_PUSH
10315 xmlGenericError(xmlGenericErrorContext,
10316 "PP: entering MISC\n");
10317#endif
10318 }
10319 } else {
10320 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10321 ctxt->sax->setDocumentLocator(ctxt->userData,
10322 &xmlDefaultSAXLocator);
10323 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010324 if (ctxt->version == NULL) {
10325 xmlErrMemory(ctxt, NULL);
10326 break;
10327 }
Owen Taylor3473f882001-02-23 17:55:21 +000010328 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10329 (!ctxt->disableSAX))
10330 ctxt->sax->startDocument(ctxt->userData);
10331 ctxt->instate = XML_PARSER_MISC;
10332#ifdef DEBUG_PUSH
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: entering MISC\n");
10335#endif
10336 }
10337 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010338 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010339 const xmlChar *name;
10340 const xmlChar *prefix;
10341 const xmlChar *URI;
10342 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010343
10344 if ((avail < 2) && (ctxt->inputNr == 1))
10345 goto done;
10346 cur = ctxt->input->cur[0];
10347 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010348 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010349 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10351 ctxt->sax->endDocument(ctxt->userData);
10352 goto done;
10353 }
10354 if (!terminate) {
10355 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010356 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010357 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010358 goto done;
10359 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10360 goto done;
10361 }
10362 }
10363 if (ctxt->spaceNr == 0)
10364 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010365 else if (*ctxt->space == -2)
10366 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010367 else
10368 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010369#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010370 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010371#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010372 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010373#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010374 else
10375 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010376#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010377 if (name == NULL) {
10378 spacePop(ctxt);
10379 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010380 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10381 ctxt->sax->endDocument(ctxt->userData);
10382 goto done;
10383 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010384#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010385 /*
10386 * [ VC: Root Element Type ]
10387 * The Name in the document type declaration must match
10388 * the element type of the root element.
10389 */
10390 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10391 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10392 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010393#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010394
10395 /*
10396 * Check for an Empty Element.
10397 */
10398 if ((RAW == '/') && (NXT(1) == '>')) {
10399 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010400
10401 if (ctxt->sax2) {
10402 if ((ctxt->sax != NULL) &&
10403 (ctxt->sax->endElementNs != NULL) &&
10404 (!ctxt->disableSAX))
10405 ctxt->sax->endElementNs(ctxt->userData, name,
10406 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010407 if (ctxt->nsNr - nsNr > 0)
10408 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010409#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010410 } else {
10411 if ((ctxt->sax != NULL) &&
10412 (ctxt->sax->endElement != NULL) &&
10413 (!ctxt->disableSAX))
10414 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010415#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010416 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010417 spacePop(ctxt);
10418 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010419 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010420 } else {
10421 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010422 }
10423 break;
10424 }
10425 if (RAW == '>') {
10426 NEXT;
10427 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010428 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010429 "Couldn't find end of Start Tag %s\n",
10430 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010431 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010432 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010433 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010434 if (ctxt->sax2)
10435 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010436#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010437 else
10438 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010439#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010440
Daniel Veillarda880b122003-04-21 21:36:41 +000010441 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010442 break;
10443 }
10444 case XML_PARSER_CONTENT: {
10445 const xmlChar *test;
10446 unsigned int cons;
10447 if ((avail < 2) && (ctxt->inputNr == 1))
10448 goto done;
10449 cur = ctxt->input->cur[0];
10450 next = ctxt->input->cur[1];
10451
10452 test = CUR_PTR;
10453 cons = ctxt->input->consumed;
10454 if ((cur == '<') && (next == '/')) {
10455 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010456 break;
10457 } else if ((cur == '<') && (next == '?')) {
10458 if ((!terminate) &&
10459 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10460 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010461 xmlParsePI(ctxt);
10462 } else if ((cur == '<') && (next != '!')) {
10463 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010464 break;
10465 } else if ((cur == '<') && (next == '!') &&
10466 (ctxt->input->cur[2] == '-') &&
10467 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010468 int term;
10469
10470 if (avail < 4)
10471 goto done;
10472 ctxt->input->cur += 4;
10473 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10474 ctxt->input->cur -= 4;
10475 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010476 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010477 xmlParseComment(ctxt);
10478 ctxt->instate = XML_PARSER_CONTENT;
10479 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10480 (ctxt->input->cur[2] == '[') &&
10481 (ctxt->input->cur[3] == 'C') &&
10482 (ctxt->input->cur[4] == 'D') &&
10483 (ctxt->input->cur[5] == 'A') &&
10484 (ctxt->input->cur[6] == 'T') &&
10485 (ctxt->input->cur[7] == 'A') &&
10486 (ctxt->input->cur[8] == '[')) {
10487 SKIP(9);
10488 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010489 break;
10490 } else if ((cur == '<') && (next == '!') &&
10491 (avail < 9)) {
10492 goto done;
10493 } else if (cur == '&') {
10494 if ((!terminate) &&
10495 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10496 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010497 xmlParseReference(ctxt);
10498 } else {
10499 /* TODO Avoid the extra copy, handle directly !!! */
10500 /*
10501 * Goal of the following test is:
10502 * - minimize calls to the SAX 'character' callback
10503 * when they are mergeable
10504 * - handle an problem for isBlank when we only parse
10505 * a sequence of blank chars and the next one is
10506 * not available to check against '<' presence.
10507 * - tries to homogenize the differences in SAX
10508 * callbacks between the push and pull versions
10509 * of the parser.
10510 */
10511 if ((ctxt->inputNr == 1) &&
10512 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10513 if (!terminate) {
10514 if (ctxt->progressive) {
10515 if ((lastlt == NULL) ||
10516 (ctxt->input->cur > lastlt))
10517 goto done;
10518 } else if (xmlParseLookupSequence(ctxt,
10519 '<', 0, 0) < 0) {
10520 goto done;
10521 }
10522 }
10523 }
10524 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010525 xmlParseCharData(ctxt, 0);
10526 }
10527 /*
10528 * Pop-up of finished entities.
10529 */
10530 while ((RAW == 0) && (ctxt->inputNr > 1))
10531 xmlPopInput(ctxt);
10532 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10534 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010535 ctxt->instate = XML_PARSER_EOF;
10536 break;
10537 }
10538 break;
10539 }
10540 case XML_PARSER_END_TAG:
10541 if (avail < 2)
10542 goto done;
10543 if (!terminate) {
10544 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010545 /* > can be found unescaped in attribute values */
10546 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010547 goto done;
10548 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10549 goto done;
10550 }
10551 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010552 if (ctxt->sax2) {
10553 xmlParseEndTag2(ctxt,
10554 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10555 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010556 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010557 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010558 }
10559#ifdef LIBXML_SAX1_ENABLED
10560 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010561 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010562#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010563 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010564 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010565 } else {
10566 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010567 }
10568 break;
10569 case XML_PARSER_CDATA_SECTION: {
10570 /*
10571 * The Push mode need to have the SAX callback for
10572 * cdataBlock merge back contiguous callbacks.
10573 */
10574 int base;
10575
10576 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10577 if (base < 0) {
10578 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010579 int tmp;
10580
10581 tmp = xmlCheckCdataPush(ctxt->input->cur,
10582 XML_PARSER_BIG_BUFFER_SIZE);
10583 if (tmp < 0) {
10584 tmp = -tmp;
10585 ctxt->input->cur += tmp;
10586 goto encoding_error;
10587 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10589 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010590 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010591 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010592 else if (ctxt->sax->characters != NULL)
10593 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010594 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010595 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010596 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010597 ctxt->checkIndex = 0;
10598 }
10599 goto done;
10600 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010601 int tmp;
10602
10603 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10604 if ((tmp < 0) || (tmp != base)) {
10605 tmp = -tmp;
10606 ctxt->input->cur += tmp;
10607 goto encoding_error;
10608 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010609 if ((ctxt->sax != NULL) && (base == 0) &&
10610 (ctxt->sax->cdataBlock != NULL) &&
10611 (!ctxt->disableSAX)) {
10612 /*
10613 * Special case to provide identical behaviour
10614 * between pull and push parsers on enpty CDATA
10615 * sections
10616 */
10617 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10618 (!strncmp((const char *)&ctxt->input->cur[-9],
10619 "<![CDATA[", 9)))
10620 ctxt->sax->cdataBlock(ctxt->userData,
10621 BAD_CAST "", 0);
10622 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010623 (!ctxt->disableSAX)) {
10624 if (ctxt->sax->cdataBlock != NULL)
10625 ctxt->sax->cdataBlock(ctxt->userData,
10626 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010627 else if (ctxt->sax->characters != NULL)
10628 ctxt->sax->characters(ctxt->userData,
10629 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010630 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010631 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010632 ctxt->checkIndex = 0;
10633 ctxt->instate = XML_PARSER_CONTENT;
10634#ifdef DEBUG_PUSH
10635 xmlGenericError(xmlGenericErrorContext,
10636 "PP: entering CONTENT\n");
10637#endif
10638 }
10639 break;
10640 }
Owen Taylor3473f882001-02-23 17:55:21 +000010641 case XML_PARSER_MISC:
10642 SKIP_BLANKS;
10643 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010644 avail = ctxt->input->length -
10645 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010646 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010647 avail = ctxt->input->buf->buffer->use -
10648 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010649 if (avail < 2)
10650 goto done;
10651 cur = ctxt->input->cur[0];
10652 next = ctxt->input->cur[1];
10653 if ((cur == '<') && (next == '?')) {
10654 if ((!terminate) &&
10655 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10656 goto done;
10657#ifdef DEBUG_PUSH
10658 xmlGenericError(xmlGenericErrorContext,
10659 "PP: Parsing PI\n");
10660#endif
10661 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010662 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010663 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010664 (ctxt->input->cur[2] == '-') &&
10665 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010666 if ((!terminate) &&
10667 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10668 goto done;
10669#ifdef DEBUG_PUSH
10670 xmlGenericError(xmlGenericErrorContext,
10671 "PP: Parsing Comment\n");
10672#endif
10673 xmlParseComment(ctxt);
10674 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010675 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010676 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010677 (ctxt->input->cur[2] == 'D') &&
10678 (ctxt->input->cur[3] == 'O') &&
10679 (ctxt->input->cur[4] == 'C') &&
10680 (ctxt->input->cur[5] == 'T') &&
10681 (ctxt->input->cur[6] == 'Y') &&
10682 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010683 (ctxt->input->cur[8] == 'E')) {
10684 if ((!terminate) &&
10685 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10686 goto done;
10687#ifdef DEBUG_PUSH
10688 xmlGenericError(xmlGenericErrorContext,
10689 "PP: Parsing internal subset\n");
10690#endif
10691 ctxt->inSubset = 1;
10692 xmlParseDocTypeDecl(ctxt);
10693 if (RAW == '[') {
10694 ctxt->instate = XML_PARSER_DTD;
10695#ifdef DEBUG_PUSH
10696 xmlGenericError(xmlGenericErrorContext,
10697 "PP: entering DTD\n");
10698#endif
10699 } else {
10700 /*
10701 * Create and update the external subset.
10702 */
10703 ctxt->inSubset = 2;
10704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10705 (ctxt->sax->externalSubset != NULL))
10706 ctxt->sax->externalSubset(ctxt->userData,
10707 ctxt->intSubName, ctxt->extSubSystem,
10708 ctxt->extSubURI);
10709 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010710 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010711 ctxt->instate = XML_PARSER_PROLOG;
10712#ifdef DEBUG_PUSH
10713 xmlGenericError(xmlGenericErrorContext,
10714 "PP: entering PROLOG\n");
10715#endif
10716 }
10717 } else if ((cur == '<') && (next == '!') &&
10718 (avail < 9)) {
10719 goto done;
10720 } else {
10721 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010722 ctxt->progressive = 1;
10723 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010724#ifdef DEBUG_PUSH
10725 xmlGenericError(xmlGenericErrorContext,
10726 "PP: entering START_TAG\n");
10727#endif
10728 }
10729 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010730 case XML_PARSER_PROLOG:
10731 SKIP_BLANKS;
10732 if (ctxt->input->buf == NULL)
10733 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10734 else
10735 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10736 if (avail < 2)
10737 goto done;
10738 cur = ctxt->input->cur[0];
10739 next = ctxt->input->cur[1];
10740 if ((cur == '<') && (next == '?')) {
10741 if ((!terminate) &&
10742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10743 goto done;
10744#ifdef DEBUG_PUSH
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: Parsing PI\n");
10747#endif
10748 xmlParsePI(ctxt);
10749 } else if ((cur == '<') && (next == '!') &&
10750 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10751 if ((!terminate) &&
10752 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10753 goto done;
10754#ifdef DEBUG_PUSH
10755 xmlGenericError(xmlGenericErrorContext,
10756 "PP: Parsing Comment\n");
10757#endif
10758 xmlParseComment(ctxt);
10759 ctxt->instate = XML_PARSER_PROLOG;
10760 } else if ((cur == '<') && (next == '!') &&
10761 (avail < 4)) {
10762 goto done;
10763 } else {
10764 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010765 if (ctxt->progressive == 0)
10766 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010767 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010768#ifdef DEBUG_PUSH
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: entering START_TAG\n");
10771#endif
10772 }
10773 break;
10774 case XML_PARSER_EPILOG:
10775 SKIP_BLANKS;
10776 if (ctxt->input->buf == NULL)
10777 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10778 else
10779 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10780 if (avail < 2)
10781 goto done;
10782 cur = ctxt->input->cur[0];
10783 next = ctxt->input->cur[1];
10784 if ((cur == '<') && (next == '?')) {
10785 if ((!terminate) &&
10786 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10787 goto done;
10788#ifdef DEBUG_PUSH
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: Parsing PI\n");
10791#endif
10792 xmlParsePI(ctxt);
10793 ctxt->instate = XML_PARSER_EPILOG;
10794 } else if ((cur == '<') && (next == '!') &&
10795 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10796 if ((!terminate) &&
10797 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10798 goto done;
10799#ifdef DEBUG_PUSH
10800 xmlGenericError(xmlGenericErrorContext,
10801 "PP: Parsing Comment\n");
10802#endif
10803 xmlParseComment(ctxt);
10804 ctxt->instate = XML_PARSER_EPILOG;
10805 } else if ((cur == '<') && (next == '!') &&
10806 (avail < 4)) {
10807 goto done;
10808 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010810 ctxt->instate = XML_PARSER_EOF;
10811#ifdef DEBUG_PUSH
10812 xmlGenericError(xmlGenericErrorContext,
10813 "PP: entering EOF\n");
10814#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010816 ctxt->sax->endDocument(ctxt->userData);
10817 goto done;
10818 }
10819 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010820 case XML_PARSER_DTD: {
10821 /*
10822 * Sorry but progressive parsing of the internal subset
10823 * is not expected to be supported. We first check that
10824 * the full content of the internal subset is available and
10825 * the parsing is launched only at that point.
10826 * Internal subset ends up with "']' S? '>'" in an unescaped
10827 * section and not in a ']]>' sequence which are conditional
10828 * sections (whoever argued to keep that crap in XML deserve
10829 * a place in hell !).
10830 */
10831 int base, i;
10832 xmlChar *buf;
10833 xmlChar quote = 0;
10834
10835 base = ctxt->input->cur - ctxt->input->base;
10836 if (base < 0) return(0);
10837 if (ctxt->checkIndex > base)
10838 base = ctxt->checkIndex;
10839 buf = ctxt->input->buf->buffer->content;
10840 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10841 base++) {
10842 if (quote != 0) {
10843 if (buf[base] == quote)
10844 quote = 0;
10845 continue;
10846 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010847 if ((quote == 0) && (buf[base] == '<')) {
10848 int found = 0;
10849 /* special handling of comments */
10850 if (((unsigned int) base + 4 <
10851 ctxt->input->buf->buffer->use) &&
10852 (buf[base + 1] == '!') &&
10853 (buf[base + 2] == '-') &&
10854 (buf[base + 3] == '-')) {
10855 for (;(unsigned int) base + 3 <
10856 ctxt->input->buf->buffer->use; base++) {
10857 if ((buf[base] == '-') &&
10858 (buf[base + 1] == '-') &&
10859 (buf[base + 2] == '>')) {
10860 found = 1;
10861 base += 2;
10862 break;
10863 }
10864 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010865 if (!found) {
10866#if 0
10867 fprintf(stderr, "unfinished comment\n");
10868#endif
10869 break; /* for */
10870 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010871 continue;
10872 }
10873 }
Owen Taylor3473f882001-02-23 17:55:21 +000010874 if (buf[base] == '"') {
10875 quote = '"';
10876 continue;
10877 }
10878 if (buf[base] == '\'') {
10879 quote = '\'';
10880 continue;
10881 }
10882 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010883#if 0
10884 fprintf(stderr, "%c%c%c%c: ", buf[base],
10885 buf[base + 1], buf[base + 2], buf[base + 3]);
10886#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010887 if ((unsigned int) base +1 >=
10888 ctxt->input->buf->buffer->use)
10889 break;
10890 if (buf[base + 1] == ']') {
10891 /* conditional crap, skip both ']' ! */
10892 base++;
10893 continue;
10894 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010895 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010896 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10897 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010898 if (buf[base + i] == '>') {
10899#if 0
10900 fprintf(stderr, "found\n");
10901#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010902 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010903 }
10904 if (!IS_BLANK_CH(buf[base + i])) {
10905#if 0
10906 fprintf(stderr, "not found\n");
10907#endif
10908 goto not_end_of_int_subset;
10909 }
Owen Taylor3473f882001-02-23 17:55:21 +000010910 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010911#if 0
10912 fprintf(stderr, "end of stream\n");
10913#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010914 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010915
Owen Taylor3473f882001-02-23 17:55:21 +000010916 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010917not_end_of_int_subset:
10918 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010919 }
10920 /*
10921 * We didn't found the end of the Internal subset
10922 */
Owen Taylor3473f882001-02-23 17:55:21 +000010923#ifdef DEBUG_PUSH
10924 if (next == 0)
10925 xmlGenericError(xmlGenericErrorContext,
10926 "PP: lookup of int subset end filed\n");
10927#endif
10928 goto done;
10929
10930found_end_int_subset:
10931 xmlParseInternalSubset(ctxt);
10932 ctxt->inSubset = 2;
10933 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10934 (ctxt->sax->externalSubset != NULL))
10935 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10936 ctxt->extSubSystem, ctxt->extSubURI);
10937 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010938 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010939 ctxt->instate = XML_PARSER_PROLOG;
10940 ctxt->checkIndex = 0;
10941#ifdef DEBUG_PUSH
10942 xmlGenericError(xmlGenericErrorContext,
10943 "PP: entering PROLOG\n");
10944#endif
10945 break;
10946 }
10947 case XML_PARSER_COMMENT:
10948 xmlGenericError(xmlGenericErrorContext,
10949 "PP: internal error, state == COMMENT\n");
10950 ctxt->instate = XML_PARSER_CONTENT;
10951#ifdef DEBUG_PUSH
10952 xmlGenericError(xmlGenericErrorContext,
10953 "PP: entering CONTENT\n");
10954#endif
10955 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010956 case XML_PARSER_IGNORE:
10957 xmlGenericError(xmlGenericErrorContext,
10958 "PP: internal error, state == IGNORE");
10959 ctxt->instate = XML_PARSER_DTD;
10960#ifdef DEBUG_PUSH
10961 xmlGenericError(xmlGenericErrorContext,
10962 "PP: entering DTD\n");
10963#endif
10964 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010965 case XML_PARSER_PI:
10966 xmlGenericError(xmlGenericErrorContext,
10967 "PP: internal error, state == PI\n");
10968 ctxt->instate = XML_PARSER_CONTENT;
10969#ifdef DEBUG_PUSH
10970 xmlGenericError(xmlGenericErrorContext,
10971 "PP: entering CONTENT\n");
10972#endif
10973 break;
10974 case XML_PARSER_ENTITY_DECL:
10975 xmlGenericError(xmlGenericErrorContext,
10976 "PP: internal error, state == ENTITY_DECL\n");
10977 ctxt->instate = XML_PARSER_DTD;
10978#ifdef DEBUG_PUSH
10979 xmlGenericError(xmlGenericErrorContext,
10980 "PP: entering DTD\n");
10981#endif
10982 break;
10983 case XML_PARSER_ENTITY_VALUE:
10984 xmlGenericError(xmlGenericErrorContext,
10985 "PP: internal error, state == ENTITY_VALUE\n");
10986 ctxt->instate = XML_PARSER_CONTENT;
10987#ifdef DEBUG_PUSH
10988 xmlGenericError(xmlGenericErrorContext,
10989 "PP: entering DTD\n");
10990#endif
10991 break;
10992 case XML_PARSER_ATTRIBUTE_VALUE:
10993 xmlGenericError(xmlGenericErrorContext,
10994 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10995 ctxt->instate = XML_PARSER_START_TAG;
10996#ifdef DEBUG_PUSH
10997 xmlGenericError(xmlGenericErrorContext,
10998 "PP: entering START_TAG\n");
10999#endif
11000 break;
11001 case XML_PARSER_SYSTEM_LITERAL:
11002 xmlGenericError(xmlGenericErrorContext,
11003 "PP: internal error, state == SYSTEM_LITERAL\n");
11004 ctxt->instate = XML_PARSER_START_TAG;
11005#ifdef DEBUG_PUSH
11006 xmlGenericError(xmlGenericErrorContext,
11007 "PP: entering START_TAG\n");
11008#endif
11009 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011010 case XML_PARSER_PUBLIC_LITERAL:
11011 xmlGenericError(xmlGenericErrorContext,
11012 "PP: internal error, state == PUBLIC_LITERAL\n");
11013 ctxt->instate = XML_PARSER_START_TAG;
11014#ifdef DEBUG_PUSH
11015 xmlGenericError(xmlGenericErrorContext,
11016 "PP: entering START_TAG\n");
11017#endif
11018 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011019 }
11020 }
11021done:
11022#ifdef DEBUG_PUSH
11023 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11024#endif
11025 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011026encoding_error:
11027 {
11028 char buffer[150];
11029
11030 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11031 ctxt->input->cur[0], ctxt->input->cur[1],
11032 ctxt->input->cur[2], ctxt->input->cur[3]);
11033 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11034 "Input is not proper UTF-8, indicate encoding !\n%s",
11035 BAD_CAST buffer, NULL);
11036 }
11037 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011038}
11039
11040/**
Owen Taylor3473f882001-02-23 17:55:21 +000011041 * xmlParseChunk:
11042 * @ctxt: an XML parser context
11043 * @chunk: an char array
11044 * @size: the size in byte of the chunk
11045 * @terminate: last chunk indicator
11046 *
11047 * Parse a Chunk of memory
11048 *
11049 * Returns zero if no error, the xmlParserErrors otherwise.
11050 */
11051int
11052xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11053 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011054 int end_in_lf = 0;
11055
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011056 if (ctxt == NULL)
11057 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011058 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011059 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011060 if (ctxt->instate == XML_PARSER_START)
11061 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011062 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11063 (chunk[size - 1] == '\r')) {
11064 end_in_lf = 1;
11065 size--;
11066 }
Owen Taylor3473f882001-02-23 17:55:21 +000011067 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11068 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11069 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11070 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011071 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011072
William M. Bracka3215c72004-07-31 16:24:01 +000011073 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11074 if (res < 0) {
11075 ctxt->errNo = XML_PARSER_EOF;
11076 ctxt->disableSAX = 1;
11077 return (XML_PARSER_EOF);
11078 }
Owen Taylor3473f882001-02-23 17:55:21 +000011079 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11080 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011081 ctxt->input->end =
11082 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011083#ifdef DEBUG_PUSH
11084 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11085#endif
11086
Owen Taylor3473f882001-02-23 17:55:21 +000011087 } else if (ctxt->instate != XML_PARSER_EOF) {
11088 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11089 xmlParserInputBufferPtr in = ctxt->input->buf;
11090 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11091 (in->raw != NULL)) {
11092 int nbchars;
11093
11094 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11095 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011096 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011097 xmlGenericError(xmlGenericErrorContext,
11098 "xmlParseChunk: encoder error\n");
11099 return(XML_ERR_INVALID_ENCODING);
11100 }
11101 }
11102 }
11103 }
11104 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011105 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11106 (ctxt->input->buf != NULL)) {
11107 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11108 }
Daniel Veillard14412512005-01-21 23:53:26 +000011109 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011110 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011111 if (terminate) {
11112 /*
11113 * Check for termination
11114 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011115 int avail = 0;
11116
11117 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011118 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011119 avail = ctxt->input->length -
11120 (ctxt->input->cur - ctxt->input->base);
11121 else
11122 avail = ctxt->input->buf->buffer->use -
11123 (ctxt->input->cur - ctxt->input->base);
11124 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011125
Owen Taylor3473f882001-02-23 17:55:21 +000011126 if ((ctxt->instate != XML_PARSER_EOF) &&
11127 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011128 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011129 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011130 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011131 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011132 }
Owen Taylor3473f882001-02-23 17:55:21 +000011133 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011134 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011135 ctxt->sax->endDocument(ctxt->userData);
11136 }
11137 ctxt->instate = XML_PARSER_EOF;
11138 }
11139 return((xmlParserErrors) ctxt->errNo);
11140}
11141
11142/************************************************************************
11143 * *
11144 * I/O front end functions to the parser *
11145 * *
11146 ************************************************************************/
11147
11148/**
Owen Taylor3473f882001-02-23 17:55:21 +000011149 * xmlCreatePushParserCtxt:
11150 * @sax: a SAX handler
11151 * @user_data: The user data returned on SAX callbacks
11152 * @chunk: a pointer to an array of chars
11153 * @size: number of chars in the array
11154 * @filename: an optional file name or URI
11155 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011156 * Create a parser context for using the XML parser in push mode.
11157 * If @buffer and @size are non-NULL, the data is used to detect
11158 * the encoding. The remaining characters will be parsed so they
11159 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011160 * To allow content encoding detection, @size should be >= 4
11161 * The value of @filename is used for fetching external entities
11162 * and error/warning reports.
11163 *
11164 * Returns the new parser context or NULL
11165 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011166
Owen Taylor3473f882001-02-23 17:55:21 +000011167xmlParserCtxtPtr
11168xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11169 const char *chunk, int size, const char *filename) {
11170 xmlParserCtxtPtr ctxt;
11171 xmlParserInputPtr inputStream;
11172 xmlParserInputBufferPtr buf;
11173 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11174
11175 /*
11176 * plug some encoding conversion routines
11177 */
11178 if ((chunk != NULL) && (size >= 4))
11179 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11180
11181 buf = xmlAllocParserInputBuffer(enc);
11182 if (buf == NULL) return(NULL);
11183
11184 ctxt = xmlNewParserCtxt();
11185 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011186 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011187 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011188 return(NULL);
11189 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011190 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011191 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11192 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011194 xmlFreeParserInputBuffer(buf);
11195 xmlFreeParserCtxt(ctxt);
11196 return(NULL);
11197 }
Owen Taylor3473f882001-02-23 17:55:21 +000011198 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011199#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011200 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011201#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011202 xmlFree(ctxt->sax);
11203 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11204 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011205 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011206 xmlFreeParserInputBuffer(buf);
11207 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011208 return(NULL);
11209 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011210 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11211 if (sax->initialized == XML_SAX2_MAGIC)
11212 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11213 else
11214 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011215 if (user_data != NULL)
11216 ctxt->userData = user_data;
11217 }
11218 if (filename == NULL) {
11219 ctxt->directory = NULL;
11220 } else {
11221 ctxt->directory = xmlParserGetDirectory(filename);
11222 }
11223
11224 inputStream = xmlNewInputStream(ctxt);
11225 if (inputStream == NULL) {
11226 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011227 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011228 return(NULL);
11229 }
11230
11231 if (filename == NULL)
11232 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011233 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011234 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011235 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011236 if (inputStream->filename == NULL) {
11237 xmlFreeParserCtxt(ctxt);
11238 xmlFreeParserInputBuffer(buf);
11239 return(NULL);
11240 }
11241 }
Owen Taylor3473f882001-02-23 17:55:21 +000011242 inputStream->buf = buf;
11243 inputStream->base = inputStream->buf->buffer->content;
11244 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011245 inputStream->end =
11246 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011247
11248 inputPush(ctxt, inputStream);
11249
William M. Brack3a1cd212005-02-11 14:35:54 +000011250 /*
11251 * If the caller didn't provide an initial 'chunk' for determining
11252 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11253 * that it can be automatically determined later
11254 */
11255 if ((size == 0) || (chunk == NULL)) {
11256 ctxt->charset = XML_CHAR_ENCODING_NONE;
11257 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011258 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11259 int cur = ctxt->input->cur - ctxt->input->base;
11260
Owen Taylor3473f882001-02-23 17:55:21 +000011261 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011262
11263 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11264 ctxt->input->cur = ctxt->input->base + cur;
11265 ctxt->input->end =
11266 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011267#ifdef DEBUG_PUSH
11268 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11269#endif
11270 }
11271
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011272 if (enc != XML_CHAR_ENCODING_NONE) {
11273 xmlSwitchEncoding(ctxt, enc);
11274 }
11275
Owen Taylor3473f882001-02-23 17:55:21 +000011276 return(ctxt);
11277}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011278#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011279
11280/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011281 * xmlStopParser:
11282 * @ctxt: an XML parser context
11283 *
11284 * Blocks further parser processing
11285 */
11286void
11287xmlStopParser(xmlParserCtxtPtr ctxt) {
11288 if (ctxt == NULL)
11289 return;
11290 ctxt->instate = XML_PARSER_EOF;
11291 ctxt->disableSAX = 1;
11292 if (ctxt->input != NULL) {
11293 ctxt->input->cur = BAD_CAST"";
11294 ctxt->input->base = ctxt->input->cur;
11295 }
11296}
11297
11298/**
Owen Taylor3473f882001-02-23 17:55:21 +000011299 * xmlCreateIOParserCtxt:
11300 * @sax: a SAX handler
11301 * @user_data: The user data returned on SAX callbacks
11302 * @ioread: an I/O read function
11303 * @ioclose: an I/O close function
11304 * @ioctx: an I/O handler
11305 * @enc: the charset encoding if known
11306 *
11307 * Create a parser context for using the XML parser with an existing
11308 * I/O stream
11309 *
11310 * Returns the new parser context or NULL
11311 */
11312xmlParserCtxtPtr
11313xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11314 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11315 void *ioctx, xmlCharEncoding enc) {
11316 xmlParserCtxtPtr ctxt;
11317 xmlParserInputPtr inputStream;
11318 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011319
11320 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011321
11322 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11323 if (buf == NULL) return(NULL);
11324
11325 ctxt = xmlNewParserCtxt();
11326 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011327 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011328 return(NULL);
11329 }
11330 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011331#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011332 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011333#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011334 xmlFree(ctxt->sax);
11335 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11336 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011337 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011338 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011339 return(NULL);
11340 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011341 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11342 if (sax->initialized == XML_SAX2_MAGIC)
11343 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11344 else
11345 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011346 if (user_data != NULL)
11347 ctxt->userData = user_data;
11348 }
11349
11350 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11351 if (inputStream == NULL) {
11352 xmlFreeParserCtxt(ctxt);
11353 return(NULL);
11354 }
11355 inputPush(ctxt, inputStream);
11356
11357 return(ctxt);
11358}
11359
Daniel Veillard4432df22003-09-28 18:58:27 +000011360#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011361/************************************************************************
11362 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011363 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011364 * *
11365 ************************************************************************/
11366
11367/**
11368 * xmlIOParseDTD:
11369 * @sax: the SAX handler block or NULL
11370 * @input: an Input Buffer
11371 * @enc: the charset encoding if known
11372 *
11373 * Load and parse a DTD
11374 *
11375 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011376 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011377 */
11378
11379xmlDtdPtr
11380xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11381 xmlCharEncoding enc) {
11382 xmlDtdPtr ret = NULL;
11383 xmlParserCtxtPtr ctxt;
11384 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011385 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011386
11387 if (input == NULL)
11388 return(NULL);
11389
11390 ctxt = xmlNewParserCtxt();
11391 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011392 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011393 return(NULL);
11394 }
11395
11396 /*
11397 * Set-up the SAX context
11398 */
11399 if (sax != NULL) {
11400 if (ctxt->sax != NULL)
11401 xmlFree(ctxt->sax);
11402 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011403 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011404 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011405 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011406
11407 /*
11408 * generate a parser input from the I/O handler
11409 */
11410
Daniel Veillard43caefb2003-12-07 19:32:22 +000011411 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011412 if (pinput == NULL) {
11413 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011414 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011415 xmlFreeParserCtxt(ctxt);
11416 return(NULL);
11417 }
11418
11419 /*
11420 * plug some encoding conversion routines here.
11421 */
11422 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011423 if (enc != XML_CHAR_ENCODING_NONE) {
11424 xmlSwitchEncoding(ctxt, enc);
11425 }
Owen Taylor3473f882001-02-23 17:55:21 +000011426
11427 pinput->filename = NULL;
11428 pinput->line = 1;
11429 pinput->col = 1;
11430 pinput->base = ctxt->input->cur;
11431 pinput->cur = ctxt->input->cur;
11432 pinput->free = NULL;
11433
11434 /*
11435 * let's parse that entity knowing it's an external subset.
11436 */
11437 ctxt->inSubset = 2;
11438 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11439 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11440 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011441
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011442 if ((enc == XML_CHAR_ENCODING_NONE) &&
11443 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011444 /*
11445 * Get the 4 first bytes and decode the charset
11446 * if enc != XML_CHAR_ENCODING_NONE
11447 * plug some encoding conversion routines.
11448 */
11449 start[0] = RAW;
11450 start[1] = NXT(1);
11451 start[2] = NXT(2);
11452 start[3] = NXT(3);
11453 enc = xmlDetectCharEncoding(start, 4);
11454 if (enc != XML_CHAR_ENCODING_NONE) {
11455 xmlSwitchEncoding(ctxt, enc);
11456 }
11457 }
11458
Owen Taylor3473f882001-02-23 17:55:21 +000011459 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11460
11461 if (ctxt->myDoc != NULL) {
11462 if (ctxt->wellFormed) {
11463 ret = ctxt->myDoc->extSubset;
11464 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011465 if (ret != NULL) {
11466 xmlNodePtr tmp;
11467
11468 ret->doc = NULL;
11469 tmp = ret->children;
11470 while (tmp != NULL) {
11471 tmp->doc = NULL;
11472 tmp = tmp->next;
11473 }
11474 }
Owen Taylor3473f882001-02-23 17:55:21 +000011475 } else {
11476 ret = NULL;
11477 }
11478 xmlFreeDoc(ctxt->myDoc);
11479 ctxt->myDoc = NULL;
11480 }
11481 if (sax != NULL) ctxt->sax = NULL;
11482 xmlFreeParserCtxt(ctxt);
11483
11484 return(ret);
11485}
11486
11487/**
11488 * xmlSAXParseDTD:
11489 * @sax: the SAX handler block
11490 * @ExternalID: a NAME* containing the External ID of the DTD
11491 * @SystemID: a NAME* containing the URL to the DTD
11492 *
11493 * Load and parse an external subset.
11494 *
11495 * Returns the resulting xmlDtdPtr or NULL in case of error.
11496 */
11497
11498xmlDtdPtr
11499xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11500 const xmlChar *SystemID) {
11501 xmlDtdPtr ret = NULL;
11502 xmlParserCtxtPtr ctxt;
11503 xmlParserInputPtr input = NULL;
11504 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011505 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011506
11507 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11508
11509 ctxt = xmlNewParserCtxt();
11510 if (ctxt == NULL) {
11511 return(NULL);
11512 }
11513
11514 /*
11515 * Set-up the SAX context
11516 */
11517 if (sax != NULL) {
11518 if (ctxt->sax != NULL)
11519 xmlFree(ctxt->sax);
11520 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011521 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011522 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011523
11524 /*
11525 * Canonicalise the system ID
11526 */
11527 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011528 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011529 xmlFreeParserCtxt(ctxt);
11530 return(NULL);
11531 }
Owen Taylor3473f882001-02-23 17:55:21 +000011532
11533 /*
11534 * Ask the Entity resolver to load the damn thing
11535 */
11536
11537 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011538 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11539 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011540 if (input == NULL) {
11541 if (sax != NULL) ctxt->sax = NULL;
11542 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011543 if (systemIdCanonic != NULL)
11544 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011545 return(NULL);
11546 }
11547
11548 /*
11549 * plug some encoding conversion routines here.
11550 */
11551 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011552 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11553 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11554 xmlSwitchEncoding(ctxt, enc);
11555 }
Owen Taylor3473f882001-02-23 17:55:21 +000011556
11557 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011558 input->filename = (char *) systemIdCanonic;
11559 else
11560 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011561 input->line = 1;
11562 input->col = 1;
11563 input->base = ctxt->input->cur;
11564 input->cur = ctxt->input->cur;
11565 input->free = NULL;
11566
11567 /*
11568 * let's parse that entity knowing it's an external subset.
11569 */
11570 ctxt->inSubset = 2;
11571 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11572 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11573 ExternalID, SystemID);
11574 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11575
11576 if (ctxt->myDoc != NULL) {
11577 if (ctxt->wellFormed) {
11578 ret = ctxt->myDoc->extSubset;
11579 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011580 if (ret != NULL) {
11581 xmlNodePtr tmp;
11582
11583 ret->doc = NULL;
11584 tmp = ret->children;
11585 while (tmp != NULL) {
11586 tmp->doc = NULL;
11587 tmp = tmp->next;
11588 }
11589 }
Owen Taylor3473f882001-02-23 17:55:21 +000011590 } else {
11591 ret = NULL;
11592 }
11593 xmlFreeDoc(ctxt->myDoc);
11594 ctxt->myDoc = NULL;
11595 }
11596 if (sax != NULL) ctxt->sax = NULL;
11597 xmlFreeParserCtxt(ctxt);
11598
11599 return(ret);
11600}
11601
Daniel Veillard4432df22003-09-28 18:58:27 +000011602
Owen Taylor3473f882001-02-23 17:55:21 +000011603/**
11604 * xmlParseDTD:
11605 * @ExternalID: a NAME* containing the External ID of the DTD
11606 * @SystemID: a NAME* containing the URL to the DTD
11607 *
11608 * Load and parse an external subset.
11609 *
11610 * Returns the resulting xmlDtdPtr or NULL in case of error.
11611 */
11612
11613xmlDtdPtr
11614xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11615 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11616}
Daniel Veillard4432df22003-09-28 18:58:27 +000011617#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011618
11619/************************************************************************
11620 * *
11621 * Front ends when parsing an Entity *
11622 * *
11623 ************************************************************************/
11624
11625/**
Owen Taylor3473f882001-02-23 17:55:21 +000011626 * xmlParseCtxtExternalEntity:
11627 * @ctx: the existing parsing context
11628 * @URL: the URL for the entity to load
11629 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011630 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011631 *
11632 * Parse an external general entity within an existing parsing context
11633 * An external general parsed entity is well-formed if it matches the
11634 * production labeled extParsedEnt.
11635 *
11636 * [78] extParsedEnt ::= TextDecl? content
11637 *
11638 * Returns 0 if the entity is well formed, -1 in case of args problem and
11639 * the parser error code otherwise
11640 */
11641
11642int
11643xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011644 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011645 xmlParserCtxtPtr ctxt;
11646 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011647 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011648 xmlSAXHandlerPtr oldsax = NULL;
11649 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011650 xmlChar start[4];
11651 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011652 xmlParserInputPtr inputStream;
11653 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011654
Daniel Veillardce682bc2004-11-05 17:22:25 +000011655 if (ctx == NULL) return(-1);
11656
Owen Taylor3473f882001-02-23 17:55:21 +000011657 if (ctx->depth > 40) {
11658 return(XML_ERR_ENTITY_LOOP);
11659 }
11660
Daniel Veillardcda96922001-08-21 10:56:31 +000011661 if (lst != NULL)
11662 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011663 if ((URL == NULL) && (ID == NULL))
11664 return(-1);
11665 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11666 return(-1);
11667
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011668 ctxt = xmlNewParserCtxt();
11669 if (ctxt == NULL) {
11670 return(-1);
11671 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011672
Owen Taylor3473f882001-02-23 17:55:21 +000011673 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011674 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011675
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011676 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11677 if (inputStream == NULL) {
11678 xmlFreeParserCtxt(ctxt);
11679 return(-1);
11680 }
11681
11682 inputPush(ctxt, inputStream);
11683
11684 if ((ctxt->directory == NULL) && (directory == NULL))
11685 directory = xmlParserGetDirectory((char *)URL);
11686 if ((ctxt->directory == NULL) && (directory != NULL))
11687 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011688
Owen Taylor3473f882001-02-23 17:55:21 +000011689 oldsax = ctxt->sax;
11690 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011691 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011692 newDoc = xmlNewDoc(BAD_CAST "1.0");
11693 if (newDoc == NULL) {
11694 xmlFreeParserCtxt(ctxt);
11695 return(-1);
11696 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011697 if (ctx->myDoc->dict) {
11698 newDoc->dict = ctx->myDoc->dict;
11699 xmlDictReference(newDoc->dict);
11700 }
Owen Taylor3473f882001-02-23 17:55:21 +000011701 if (ctx->myDoc != NULL) {
11702 newDoc->intSubset = ctx->myDoc->intSubset;
11703 newDoc->extSubset = ctx->myDoc->extSubset;
11704 }
11705 if (ctx->myDoc->URL != NULL) {
11706 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11707 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011708 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11709 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011710 ctxt->sax = oldsax;
11711 xmlFreeParserCtxt(ctxt);
11712 newDoc->intSubset = NULL;
11713 newDoc->extSubset = NULL;
11714 xmlFreeDoc(newDoc);
11715 return(-1);
11716 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011717 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011718 nodePush(ctxt, newDoc->children);
11719 if (ctx->myDoc == NULL) {
11720 ctxt->myDoc = newDoc;
11721 } else {
11722 ctxt->myDoc = ctx->myDoc;
11723 newDoc->children->doc = ctx->myDoc;
11724 }
11725
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011726 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000011727 * Get the 4 first bytes and decode the charset
11728 * if enc != XML_CHAR_ENCODING_NONE
11729 * plug some encoding conversion routines.
11730 */
11731 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011732 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11733 start[0] = RAW;
11734 start[1] = NXT(1);
11735 start[2] = NXT(2);
11736 start[3] = NXT(3);
11737 enc = xmlDetectCharEncoding(start, 4);
11738 if (enc != XML_CHAR_ENCODING_NONE) {
11739 xmlSwitchEncoding(ctxt, enc);
11740 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011741 }
11742
Owen Taylor3473f882001-02-23 17:55:21 +000011743 /*
11744 * Parse a possible text declaration first
11745 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011746 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011747 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011748 /*
11749 * An XML-1.0 document can't reference an entity not XML-1.0
11750 */
11751 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
11752 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11753 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11754 "Version mismatch between document and entity\n");
11755 }
Owen Taylor3473f882001-02-23 17:55:21 +000011756 }
11757
11758 /*
11759 * Doing validity checking on chunk doesn't make sense
11760 */
11761 ctxt->instate = XML_PARSER_CONTENT;
11762 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011763 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 ctxt->loadsubset = ctx->loadsubset;
11765 ctxt->depth = ctx->depth + 1;
11766 ctxt->replaceEntities = ctx->replaceEntities;
11767 if (ctxt->validate) {
11768 ctxt->vctxt.error = ctx->vctxt.error;
11769 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011770 } else {
11771 ctxt->vctxt.error = NULL;
11772 ctxt->vctxt.warning = NULL;
11773 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011774 ctxt->vctxt.nodeTab = NULL;
11775 ctxt->vctxt.nodeNr = 0;
11776 ctxt->vctxt.nodeMax = 0;
11777 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011778 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11779 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011780 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11781 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11782 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011783 ctxt->dictNames = ctx->dictNames;
11784 ctxt->attsDefault = ctx->attsDefault;
11785 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011786 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011787
11788 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011789
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011790 ctx->validate = ctxt->validate;
11791 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011792 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011793 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011794 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011795 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011796 }
11797 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011798 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011799 }
11800
11801 if (!ctxt->wellFormed) {
11802 if (ctxt->errNo == 0)
11803 ret = 1;
11804 else
11805 ret = ctxt->errNo;
11806 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011807 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011808 xmlNodePtr cur;
11809
11810 /*
11811 * Return the newly created nodeset after unlinking it from
11812 * they pseudo parent.
11813 */
11814 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011815 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011816 while (cur != NULL) {
11817 cur->parent = NULL;
11818 cur = cur->next;
11819 }
11820 newDoc->children->children = NULL;
11821 }
11822 ret = 0;
11823 }
11824 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011825 ctxt->dict = NULL;
11826 ctxt->attsDefault = NULL;
11827 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011828 xmlFreeParserCtxt(ctxt);
11829 newDoc->intSubset = NULL;
11830 newDoc->extSubset = NULL;
11831 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011832
Owen Taylor3473f882001-02-23 17:55:21 +000011833 return(ret);
11834}
11835
11836/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011837 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011838 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011839 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011840 * @sax: the SAX handler bloc (possibly NULL)
11841 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11842 * @depth: Used for loop detection, use 0
11843 * @URL: the URL for the entity to load
11844 * @ID: the System ID for the entity to load
11845 * @list: the return value for the set of parsed nodes
11846 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011847 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011848 *
11849 * Returns 0 if the entity is well formed, -1 in case of args problem and
11850 * the parser error code otherwise
11851 */
11852
Daniel Veillard7d515752003-09-26 19:12:37 +000011853static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011854xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11855 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011856 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011857 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011858 xmlParserCtxtPtr ctxt;
11859 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011860 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011861 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011862 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011863 xmlChar start[4];
11864 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011865
11866 if (depth > 40) {
11867 return(XML_ERR_ENTITY_LOOP);
11868 }
11869
11870
11871
11872 if (list != NULL)
11873 *list = NULL;
11874 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011875 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011876 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011877 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011878
11879
11880 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011881 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011882 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011883 if (oldctxt != NULL) {
11884 ctxt->_private = oldctxt->_private;
11885 ctxt->loadsubset = oldctxt->loadsubset;
11886 ctxt->validate = oldctxt->validate;
11887 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011888 ctxt->record_info = oldctxt->record_info;
11889 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11890 ctxt->node_seq.length = oldctxt->node_seq.length;
11891 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011892 } else {
11893 /*
11894 * Doing validity checking on chunk without context
11895 * doesn't make sense
11896 */
11897 ctxt->_private = NULL;
11898 ctxt->validate = 0;
11899 ctxt->external = 2;
11900 ctxt->loadsubset = 0;
11901 }
Owen Taylor3473f882001-02-23 17:55:21 +000011902 if (sax != NULL) {
11903 oldsax = ctxt->sax;
11904 ctxt->sax = sax;
11905 if (user_data != NULL)
11906 ctxt->userData = user_data;
11907 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011908 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011909 newDoc = xmlNewDoc(BAD_CAST "1.0");
11910 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011911 ctxt->node_seq.maximum = 0;
11912 ctxt->node_seq.length = 0;
11913 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011914 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011915 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011916 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011917 newDoc->intSubset = doc->intSubset;
11918 newDoc->extSubset = doc->extSubset;
11919 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011920 xmlDictReference(newDoc->dict);
11921
Owen Taylor3473f882001-02-23 17:55:21 +000011922 if (doc->URL != NULL) {
11923 newDoc->URL = xmlStrdup(doc->URL);
11924 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011925 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11926 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011927 if (sax != NULL)
11928 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011929 ctxt->node_seq.maximum = 0;
11930 ctxt->node_seq.length = 0;
11931 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011932 xmlFreeParserCtxt(ctxt);
11933 newDoc->intSubset = NULL;
11934 newDoc->extSubset = NULL;
11935 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011936 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011937 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011938 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011939 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011940 ctxt->myDoc = doc;
11941 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011942
Daniel Veillard87a764e2001-06-20 17:41:10 +000011943 /*
11944 * Get the 4 first bytes and decode the charset
11945 * if enc != XML_CHAR_ENCODING_NONE
11946 * plug some encoding conversion routines.
11947 */
11948 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011949 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11950 start[0] = RAW;
11951 start[1] = NXT(1);
11952 start[2] = NXT(2);
11953 start[3] = NXT(3);
11954 enc = xmlDetectCharEncoding(start, 4);
11955 if (enc != XML_CHAR_ENCODING_NONE) {
11956 xmlSwitchEncoding(ctxt, enc);
11957 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011958 }
11959
Owen Taylor3473f882001-02-23 17:55:21 +000011960 /*
11961 * Parse a possible text declaration first
11962 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011963 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011964 xmlParseTextDecl(ctxt);
11965 }
11966
Owen Taylor3473f882001-02-23 17:55:21 +000011967 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011968 ctxt->depth = depth;
11969
11970 xmlParseContent(ctxt);
11971
Daniel Veillard561b7f82002-03-20 21:55:57 +000011972 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011973 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011974 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011975 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011976 }
11977 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011978 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011979 }
11980
11981 if (!ctxt->wellFormed) {
11982 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011983 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011984 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011985 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011986 } else {
11987 if (list != NULL) {
11988 xmlNodePtr cur;
11989
11990 /*
11991 * Return the newly created nodeset after unlinking it from
11992 * they pseudo parent.
11993 */
11994 cur = newDoc->children->children;
11995 *list = cur;
11996 while (cur != NULL) {
11997 cur->parent = NULL;
11998 cur = cur->next;
11999 }
12000 newDoc->children->children = NULL;
12001 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012002 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012003 }
12004 if (sax != NULL)
12005 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012006 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12007 oldctxt->node_seq.length = ctxt->node_seq.length;
12008 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012009 ctxt->node_seq.maximum = 0;
12010 ctxt->node_seq.length = 0;
12011 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012012 xmlFreeParserCtxt(ctxt);
12013 newDoc->intSubset = NULL;
12014 newDoc->extSubset = NULL;
12015 xmlFreeDoc(newDoc);
12016
12017 return(ret);
12018}
12019
Daniel Veillard81273902003-09-30 00:43:48 +000012020#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012021/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012022 * xmlParseExternalEntity:
12023 * @doc: the document the chunk pertains to
12024 * @sax: the SAX handler bloc (possibly NULL)
12025 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12026 * @depth: Used for loop detection, use 0
12027 * @URL: the URL for the entity to load
12028 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012029 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012030 *
12031 * Parse an external general entity
12032 * An external general parsed entity is well-formed if it matches the
12033 * production labeled extParsedEnt.
12034 *
12035 * [78] extParsedEnt ::= TextDecl? content
12036 *
12037 * Returns 0 if the entity is well formed, -1 in case of args problem and
12038 * the parser error code otherwise
12039 */
12040
12041int
12042xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012043 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012044 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012045 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012046}
12047
12048/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012049 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012050 * @doc: the document the chunk pertains to
12051 * @sax: the SAX handler bloc (possibly NULL)
12052 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12053 * @depth: Used for loop detection, use 0
12054 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012055 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012056 *
12057 * Parse a well-balanced chunk of an XML document
12058 * called by the parser
12059 * The allowed sequence for the Well Balanced Chunk is the one defined by
12060 * the content production in the XML grammar:
12061 *
12062 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12063 *
12064 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12065 * the parser error code otherwise
12066 */
12067
12068int
12069xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012070 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012071 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12072 depth, string, lst, 0 );
12073}
Daniel Veillard81273902003-09-30 00:43:48 +000012074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012075
12076/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012077 * xmlParseBalancedChunkMemoryInternal:
12078 * @oldctxt: the existing parsing context
12079 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12080 * @user_data: the user data field for the parser context
12081 * @lst: the return value for the set of parsed nodes
12082 *
12083 *
12084 * Parse a well-balanced chunk of an XML document
12085 * called by the parser
12086 * The allowed sequence for the Well Balanced Chunk is the one defined by
12087 * the content production in the XML grammar:
12088 *
12089 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12090 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012091 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12092 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000012093 *
12094 * In case recover is set to 1, the nodelist will not be empty even if
12095 * the parsed chunk is not well balanced.
12096 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012097static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012098xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12099 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12100 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012101 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012102 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012103 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012104 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012105 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012106 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012107 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012108
12109 if (oldctxt->depth > 40) {
12110 return(XML_ERR_ENTITY_LOOP);
12111 }
12112
12113
12114 if (lst != NULL)
12115 *lst = NULL;
12116 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012117 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012118
12119 size = xmlStrlen(string);
12120
12121 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012122 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012123 if (user_data != NULL)
12124 ctxt->userData = user_data;
12125 else
12126 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012127 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12128 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012129 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12130 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12131 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012132
12133 oldsax = ctxt->sax;
12134 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012135 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012136 ctxt->replaceEntities = oldctxt->replaceEntities;
12137 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012138
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012139 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012140 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012141 newDoc = xmlNewDoc(BAD_CAST "1.0");
12142 if (newDoc == NULL) {
12143 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012144 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012145 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012146 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012147 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012148 newDoc->dict = ctxt->dict;
12149 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012150 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012151 } else {
12152 ctxt->myDoc = oldctxt->myDoc;
12153 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012154 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012155 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012156 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12157 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012158 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012159 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012160 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012161 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012162 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012163 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012164 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012165 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012166 ctxt->myDoc->children = NULL;
12167 ctxt->myDoc->last = NULL;
12168 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012169 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012170 ctxt->instate = XML_PARSER_CONTENT;
12171 ctxt->depth = oldctxt->depth + 1;
12172
Daniel Veillard328f48c2002-11-15 15:24:34 +000012173 ctxt->validate = 0;
12174 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012175 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12176 /*
12177 * ID/IDREF registration will be done in xmlValidateElement below
12178 */
12179 ctxt->loadsubset |= XML_SKIP_IDS;
12180 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012181 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012182 ctxt->attsDefault = oldctxt->attsDefault;
12183 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012184
Daniel Veillard68e9e742002-11-16 15:35:11 +000012185 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012186 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012188 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012190 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012191 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012193 }
12194
12195 if (!ctxt->wellFormed) {
12196 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012197 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012198 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012199 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012200 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012201 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012202 }
12203
William M. Brack7b9154b2003-09-27 19:23:50 +000012204 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012205 xmlNodePtr cur;
12206
12207 /*
12208 * Return the newly created nodeset after unlinking it from
12209 * they pseudo parent.
12210 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012211 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012212 *lst = cur;
12213 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012214#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012215 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12216 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12217 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012218 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12219 oldctxt->myDoc, cur);
12220 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012221#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012222 cur->parent = NULL;
12223 cur = cur->next;
12224 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012225 ctxt->myDoc->children->children = NULL;
12226 }
12227 if (ctxt->myDoc != NULL) {
12228 xmlFreeNode(ctxt->myDoc->children);
12229 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012230 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012231 }
12232
12233 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012234 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012235 ctxt->attsDefault = NULL;
12236 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012237 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012238 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012239 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012240 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012241
12242 return(ret);
12243}
12244
Daniel Veillard29b17482004-08-16 00:39:03 +000012245/**
12246 * xmlParseInNodeContext:
12247 * @node: the context node
12248 * @data: the input string
12249 * @datalen: the input string length in bytes
12250 * @options: a combination of xmlParserOption
12251 * @lst: the return value for the set of parsed nodes
12252 *
12253 * Parse a well-balanced chunk of an XML document
12254 * within the context (DTD, namespaces, etc ...) of the given node.
12255 *
12256 * The allowed sequence for the data is a Well Balanced Chunk defined by
12257 * the content production in the XML grammar:
12258 *
12259 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12260 *
12261 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12262 * error code otherwise
12263 */
12264xmlParserErrors
12265xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12266 int options, xmlNodePtr *lst) {
12267#ifdef SAX2
12268 xmlParserCtxtPtr ctxt;
12269 xmlDocPtr doc = NULL;
12270 xmlNodePtr fake, cur;
12271 int nsnr = 0;
12272
12273 xmlParserErrors ret = XML_ERR_OK;
12274
12275 /*
12276 * check all input parameters, grab the document
12277 */
12278 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12279 return(XML_ERR_INTERNAL_ERROR);
12280 switch (node->type) {
12281 case XML_ELEMENT_NODE:
12282 case XML_ATTRIBUTE_NODE:
12283 case XML_TEXT_NODE:
12284 case XML_CDATA_SECTION_NODE:
12285 case XML_ENTITY_REF_NODE:
12286 case XML_PI_NODE:
12287 case XML_COMMENT_NODE:
12288 case XML_DOCUMENT_NODE:
12289 case XML_HTML_DOCUMENT_NODE:
12290 break;
12291 default:
12292 return(XML_ERR_INTERNAL_ERROR);
12293
12294 }
12295 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12296 (node->type != XML_DOCUMENT_NODE) &&
12297 (node->type != XML_HTML_DOCUMENT_NODE))
12298 node = node->parent;
12299 if (node == NULL)
12300 return(XML_ERR_INTERNAL_ERROR);
12301 if (node->type == XML_ELEMENT_NODE)
12302 doc = node->doc;
12303 else
12304 doc = (xmlDocPtr) node;
12305 if (doc == NULL)
12306 return(XML_ERR_INTERNAL_ERROR);
12307
12308 /*
12309 * allocate a context and set-up everything not related to the
12310 * node position in the tree
12311 */
12312 if (doc->type == XML_DOCUMENT_NODE)
12313 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12314#ifdef LIBXML_HTML_ENABLED
12315 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12316 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12317#endif
12318 else
12319 return(XML_ERR_INTERNAL_ERROR);
12320
12321 if (ctxt == NULL)
12322 return(XML_ERR_NO_MEMORY);
12323 fake = xmlNewComment(NULL);
12324 if (fake == NULL) {
12325 xmlFreeParserCtxt(ctxt);
12326 return(XML_ERR_NO_MEMORY);
12327 }
12328 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012329
12330 /*
12331 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12332 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12333 * we must wait until the last moment to free the original one.
12334 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012335 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012336 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012337 xmlDictFree(ctxt->dict);
12338 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012339 } else
12340 options |= XML_PARSE_NODICT;
12341
12342 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012343 xmlDetectSAX2(ctxt);
12344 ctxt->myDoc = doc;
12345
12346 if (node->type == XML_ELEMENT_NODE) {
12347 nodePush(ctxt, node);
12348 /*
12349 * initialize the SAX2 namespaces stack
12350 */
12351 cur = node;
12352 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12353 xmlNsPtr ns = cur->nsDef;
12354 const xmlChar *iprefix, *ihref;
12355
12356 while (ns != NULL) {
12357 if (ctxt->dict) {
12358 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12359 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12360 } else {
12361 iprefix = ns->prefix;
12362 ihref = ns->href;
12363 }
12364
12365 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12366 nsPush(ctxt, iprefix, ihref);
12367 nsnr++;
12368 }
12369 ns = ns->next;
12370 }
12371 cur = cur->parent;
12372 }
12373 ctxt->instate = XML_PARSER_CONTENT;
12374 }
12375
12376 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12377 /*
12378 * ID/IDREF registration will be done in xmlValidateElement below
12379 */
12380 ctxt->loadsubset |= XML_SKIP_IDS;
12381 }
12382
Daniel Veillard499cc922006-01-18 17:22:35 +000012383#ifdef LIBXML_HTML_ENABLED
12384 if (doc->type == XML_HTML_DOCUMENT_NODE)
12385 __htmlParseContent(ctxt);
12386 else
12387#endif
12388 xmlParseContent(ctxt);
12389
Daniel Veillard29b17482004-08-16 00:39:03 +000012390 nsPop(ctxt, nsnr);
12391 if ((RAW == '<') && (NXT(1) == '/')) {
12392 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12393 } else if (RAW != 0) {
12394 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12395 }
12396 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12397 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12398 ctxt->wellFormed = 0;
12399 }
12400
12401 if (!ctxt->wellFormed) {
12402 if (ctxt->errNo == 0)
12403 ret = XML_ERR_INTERNAL_ERROR;
12404 else
12405 ret = (xmlParserErrors)ctxt->errNo;
12406 } else {
12407 ret = XML_ERR_OK;
12408 }
12409
12410 /*
12411 * Return the newly created nodeset after unlinking it from
12412 * the pseudo sibling.
12413 */
12414
12415 cur = fake->next;
12416 fake->next = NULL;
12417 node->last = fake;
12418
12419 if (cur != NULL) {
12420 cur->prev = NULL;
12421 }
12422
12423 *lst = cur;
12424
12425 while (cur != NULL) {
12426 cur->parent = NULL;
12427 cur = cur->next;
12428 }
12429
12430 xmlUnlinkNode(fake);
12431 xmlFreeNode(fake);
12432
12433
12434 if (ret != XML_ERR_OK) {
12435 xmlFreeNodeList(*lst);
12436 *lst = NULL;
12437 }
William M. Brackc3f81342004-10-03 01:22:44 +000012438
William M. Brackb7b54de2004-10-06 16:38:01 +000012439 if (doc->dict != NULL)
12440 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012441 xmlFreeParserCtxt(ctxt);
12442
12443 return(ret);
12444#else /* !SAX2 */
12445 return(XML_ERR_INTERNAL_ERROR);
12446#endif
12447}
12448
Daniel Veillard81273902003-09-30 00:43:48 +000012449#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012450/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012451 * xmlParseBalancedChunkMemoryRecover:
12452 * @doc: the document the chunk pertains to
12453 * @sax: the SAX handler bloc (possibly NULL)
12454 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12455 * @depth: Used for loop detection, use 0
12456 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12457 * @lst: the return value for the set of parsed nodes
12458 * @recover: return nodes even if the data is broken (use 0)
12459 *
12460 *
12461 * Parse a well-balanced chunk of an XML document
12462 * called by the parser
12463 * The allowed sequence for the Well Balanced Chunk is the one defined by
12464 * the content production in the XML grammar:
12465 *
12466 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12467 *
12468 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12469 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012470 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012471 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012472 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12473 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012474 */
12475int
12476xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012477 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012478 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012479 xmlParserCtxtPtr ctxt;
12480 xmlDocPtr newDoc;
12481 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012482 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012483 int size;
12484 int ret = 0;
12485
12486 if (depth > 40) {
12487 return(XML_ERR_ENTITY_LOOP);
12488 }
12489
12490
Daniel Veillardcda96922001-08-21 10:56:31 +000012491 if (lst != NULL)
12492 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012493 if (string == NULL)
12494 return(-1);
12495
12496 size = xmlStrlen(string);
12497
12498 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12499 if (ctxt == NULL) return(-1);
12500 ctxt->userData = ctxt;
12501 if (sax != NULL) {
12502 oldsax = ctxt->sax;
12503 ctxt->sax = sax;
12504 if (user_data != NULL)
12505 ctxt->userData = user_data;
12506 }
12507 newDoc = xmlNewDoc(BAD_CAST "1.0");
12508 if (newDoc == NULL) {
12509 xmlFreeParserCtxt(ctxt);
12510 return(-1);
12511 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012512 if ((doc != NULL) && (doc->dict != NULL)) {
12513 xmlDictFree(ctxt->dict);
12514 ctxt->dict = doc->dict;
12515 xmlDictReference(ctxt->dict);
12516 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12517 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12518 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12519 ctxt->dictNames = 1;
12520 } else {
12521 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12522 }
Owen Taylor3473f882001-02-23 17:55:21 +000012523 if (doc != NULL) {
12524 newDoc->intSubset = doc->intSubset;
12525 newDoc->extSubset = doc->extSubset;
12526 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012527 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12528 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012529 if (sax != NULL)
12530 ctxt->sax = oldsax;
12531 xmlFreeParserCtxt(ctxt);
12532 newDoc->intSubset = NULL;
12533 newDoc->extSubset = NULL;
12534 xmlFreeDoc(newDoc);
12535 return(-1);
12536 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012537 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12538 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012539 if (doc == NULL) {
12540 ctxt->myDoc = newDoc;
12541 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012542 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012543 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012544 /* Ensure that doc has XML spec namespace */
12545 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12546 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012547 }
12548 ctxt->instate = XML_PARSER_CONTENT;
12549 ctxt->depth = depth;
12550
12551 /*
12552 * Doing validity checking on chunk doesn't make sense
12553 */
12554 ctxt->validate = 0;
12555 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012556 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012557
Daniel Veillardb39bc392002-10-26 19:29:51 +000012558 if ( doc != NULL ){
12559 content = doc->children;
12560 doc->children = NULL;
12561 xmlParseContent(ctxt);
12562 doc->children = content;
12563 }
12564 else {
12565 xmlParseContent(ctxt);
12566 }
Owen Taylor3473f882001-02-23 17:55:21 +000012567 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012568 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012569 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012570 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012571 }
12572 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012573 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012574 }
12575
12576 if (!ctxt->wellFormed) {
12577 if (ctxt->errNo == 0)
12578 ret = 1;
12579 else
12580 ret = ctxt->errNo;
12581 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012582 ret = 0;
12583 }
12584
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012585 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12586 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012587
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012588 /*
12589 * Return the newly created nodeset after unlinking it from
12590 * they pseudo parent.
12591 */
12592 cur = newDoc->children->children;
12593 *lst = cur;
12594 while (cur != NULL) {
12595 xmlSetTreeDoc(cur, doc);
12596 cur->parent = NULL;
12597 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012598 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012599 newDoc->children->children = NULL;
12600 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012601
Owen Taylor3473f882001-02-23 17:55:21 +000012602 if (sax != NULL)
12603 ctxt->sax = oldsax;
12604 xmlFreeParserCtxt(ctxt);
12605 newDoc->intSubset = NULL;
12606 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012607 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012608 xmlFreeDoc(newDoc);
12609
12610 return(ret);
12611}
12612
12613/**
12614 * xmlSAXParseEntity:
12615 * @sax: the SAX handler block
12616 * @filename: the filename
12617 *
12618 * parse an XML external entity out of context and build a tree.
12619 * It use the given SAX function block to handle the parsing callback.
12620 * If sax is NULL, fallback to the default DOM tree building routines.
12621 *
12622 * [78] extParsedEnt ::= TextDecl? content
12623 *
12624 * This correspond to a "Well Balanced" chunk
12625 *
12626 * Returns the resulting document tree
12627 */
12628
12629xmlDocPtr
12630xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12631 xmlDocPtr ret;
12632 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012633
12634 ctxt = xmlCreateFileParserCtxt(filename);
12635 if (ctxt == NULL) {
12636 return(NULL);
12637 }
12638 if (sax != NULL) {
12639 if (ctxt->sax != NULL)
12640 xmlFree(ctxt->sax);
12641 ctxt->sax = sax;
12642 ctxt->userData = NULL;
12643 }
12644
Owen Taylor3473f882001-02-23 17:55:21 +000012645 xmlParseExtParsedEnt(ctxt);
12646
12647 if (ctxt->wellFormed)
12648 ret = ctxt->myDoc;
12649 else {
12650 ret = NULL;
12651 xmlFreeDoc(ctxt->myDoc);
12652 ctxt->myDoc = NULL;
12653 }
12654 if (sax != NULL)
12655 ctxt->sax = NULL;
12656 xmlFreeParserCtxt(ctxt);
12657
12658 return(ret);
12659}
12660
12661/**
12662 * xmlParseEntity:
12663 * @filename: the filename
12664 *
12665 * parse an XML external entity out of context and build a tree.
12666 *
12667 * [78] extParsedEnt ::= TextDecl? content
12668 *
12669 * This correspond to a "Well Balanced" chunk
12670 *
12671 * Returns the resulting document tree
12672 */
12673
12674xmlDocPtr
12675xmlParseEntity(const char *filename) {
12676 return(xmlSAXParseEntity(NULL, filename));
12677}
Daniel Veillard81273902003-09-30 00:43:48 +000012678#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012679
12680/**
12681 * xmlCreateEntityParserCtxt:
12682 * @URL: the entity URL
12683 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012684 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012685 *
12686 * Create a parser context for an external entity
12687 * Automatic support for ZLIB/Compress compressed document is provided
12688 * by default if found at compile-time.
12689 *
12690 * Returns the new parser context or NULL
12691 */
12692xmlParserCtxtPtr
12693xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12694 const xmlChar *base) {
12695 xmlParserCtxtPtr ctxt;
12696 xmlParserInputPtr inputStream;
12697 char *directory = NULL;
12698 xmlChar *uri;
12699
12700 ctxt = xmlNewParserCtxt();
12701 if (ctxt == NULL) {
12702 return(NULL);
12703 }
12704
12705 uri = xmlBuildURI(URL, base);
12706
12707 if (uri == NULL) {
12708 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12709 if (inputStream == NULL) {
12710 xmlFreeParserCtxt(ctxt);
12711 return(NULL);
12712 }
12713
12714 inputPush(ctxt, inputStream);
12715
12716 if ((ctxt->directory == NULL) && (directory == NULL))
12717 directory = xmlParserGetDirectory((char *)URL);
12718 if ((ctxt->directory == NULL) && (directory != NULL))
12719 ctxt->directory = directory;
12720 } else {
12721 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12722 if (inputStream == NULL) {
12723 xmlFree(uri);
12724 xmlFreeParserCtxt(ctxt);
12725 return(NULL);
12726 }
12727
12728 inputPush(ctxt, inputStream);
12729
12730 if ((ctxt->directory == NULL) && (directory == NULL))
12731 directory = xmlParserGetDirectory((char *)uri);
12732 if ((ctxt->directory == NULL) && (directory != NULL))
12733 ctxt->directory = directory;
12734 xmlFree(uri);
12735 }
Owen Taylor3473f882001-02-23 17:55:21 +000012736 return(ctxt);
12737}
12738
12739/************************************************************************
12740 * *
12741 * Front ends when parsing from a file *
12742 * *
12743 ************************************************************************/
12744
12745/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012746 * xmlCreateURLParserCtxt:
12747 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012748 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012749 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012750 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012751 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012752 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012753 *
12754 * Returns the new parser context or NULL
12755 */
12756xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012757xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012758{
12759 xmlParserCtxtPtr ctxt;
12760 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012761 char *directory = NULL;
12762
Owen Taylor3473f882001-02-23 17:55:21 +000012763 ctxt = xmlNewParserCtxt();
12764 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012765 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012766 return(NULL);
12767 }
12768
Daniel Veillarddf292f72005-01-16 19:00:15 +000012769 if (options)
12770 xmlCtxtUseOptions(ctxt, options);
12771 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012772
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012773 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012774 if (inputStream == NULL) {
12775 xmlFreeParserCtxt(ctxt);
12776 return(NULL);
12777 }
12778
Owen Taylor3473f882001-02-23 17:55:21 +000012779 inputPush(ctxt, inputStream);
12780 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012781 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012782 if ((ctxt->directory == NULL) && (directory != NULL))
12783 ctxt->directory = directory;
12784
12785 return(ctxt);
12786}
12787
Daniel Veillard61b93382003-11-03 14:28:31 +000012788/**
12789 * xmlCreateFileParserCtxt:
12790 * @filename: the filename
12791 *
12792 * Create a parser context for a file content.
12793 * Automatic support for ZLIB/Compress compressed document is provided
12794 * by default if found at compile-time.
12795 *
12796 * Returns the new parser context or NULL
12797 */
12798xmlParserCtxtPtr
12799xmlCreateFileParserCtxt(const char *filename)
12800{
12801 return(xmlCreateURLParserCtxt(filename, 0));
12802}
12803
Daniel Veillard81273902003-09-30 00:43:48 +000012804#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012805/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012806 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012807 * @sax: the SAX handler block
12808 * @filename: the filename
12809 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12810 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012811 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012812 *
12813 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12814 * compressed document is provided by default if found at compile-time.
12815 * It use the given SAX function block to handle the parsing callback.
12816 * If sax is NULL, fallback to the default DOM tree building routines.
12817 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012818 * User data (void *) is stored within the parser context in the
12819 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012820 *
Owen Taylor3473f882001-02-23 17:55:21 +000012821 * Returns the resulting document tree
12822 */
12823
12824xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012825xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12826 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012827 xmlDocPtr ret;
12828 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012829
Daniel Veillard635ef722001-10-29 11:48:19 +000012830 xmlInitParser();
12831
Owen Taylor3473f882001-02-23 17:55:21 +000012832 ctxt = xmlCreateFileParserCtxt(filename);
12833 if (ctxt == NULL) {
12834 return(NULL);
12835 }
12836 if (sax != NULL) {
12837 if (ctxt->sax != NULL)
12838 xmlFree(ctxt->sax);
12839 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012840 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012841 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012842 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012843 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012844 }
Owen Taylor3473f882001-02-23 17:55:21 +000012845
Daniel Veillard37d2d162008-03-14 10:54:00 +000012846 if (ctxt->directory == NULL)
12847 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012848
Daniel Veillarddad3f682002-11-17 16:47:27 +000012849 ctxt->recovery = recovery;
12850
Owen Taylor3473f882001-02-23 17:55:21 +000012851 xmlParseDocument(ctxt);
12852
William M. Brackc07329e2003-09-08 01:57:30 +000012853 if ((ctxt->wellFormed) || recovery) {
12854 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012855 if (ret != NULL) {
12856 if (ctxt->input->buf->compressed > 0)
12857 ret->compression = 9;
12858 else
12859 ret->compression = ctxt->input->buf->compressed;
12860 }
William M. Brackc07329e2003-09-08 01:57:30 +000012861 }
Owen Taylor3473f882001-02-23 17:55:21 +000012862 else {
12863 ret = NULL;
12864 xmlFreeDoc(ctxt->myDoc);
12865 ctxt->myDoc = NULL;
12866 }
12867 if (sax != NULL)
12868 ctxt->sax = NULL;
12869 xmlFreeParserCtxt(ctxt);
12870
12871 return(ret);
12872}
12873
12874/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012875 * xmlSAXParseFile:
12876 * @sax: the SAX handler block
12877 * @filename: the filename
12878 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12879 * documents
12880 *
12881 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12882 * compressed document is provided by default if found at compile-time.
12883 * It use the given SAX function block to handle the parsing callback.
12884 * If sax is NULL, fallback to the default DOM tree building routines.
12885 *
12886 * Returns the resulting document tree
12887 */
12888
12889xmlDocPtr
12890xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12891 int recovery) {
12892 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12893}
12894
12895/**
Owen Taylor3473f882001-02-23 17:55:21 +000012896 * xmlRecoverDoc:
12897 * @cur: a pointer to an array of xmlChar
12898 *
12899 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000012900 * In the case the document is not Well Formed, a attempt to build a
12901 * tree is tried anyway
12902 *
12903 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000012904 */
12905
12906xmlDocPtr
12907xmlRecoverDoc(xmlChar *cur) {
12908 return(xmlSAXParseDoc(NULL, cur, 1));
12909}
12910
12911/**
12912 * xmlParseFile:
12913 * @filename: the filename
12914 *
12915 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12916 * compressed document is provided by default if found at compile-time.
12917 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012918 * Returns the resulting document tree if the file was wellformed,
12919 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012920 */
12921
12922xmlDocPtr
12923xmlParseFile(const char *filename) {
12924 return(xmlSAXParseFile(NULL, filename, 0));
12925}
12926
12927/**
12928 * xmlRecoverFile:
12929 * @filename: the filename
12930 *
12931 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12932 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000012933 * In the case the document is not Well Formed, it attempts to build
12934 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000012935 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000012936 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000012937 */
12938
12939xmlDocPtr
12940xmlRecoverFile(const char *filename) {
12941 return(xmlSAXParseFile(NULL, filename, 1));
12942}
12943
12944
12945/**
12946 * xmlSetupParserForBuffer:
12947 * @ctxt: an XML parser context
12948 * @buffer: a xmlChar * buffer
12949 * @filename: a file name
12950 *
12951 * Setup the parser context to parse a new buffer; Clears any prior
12952 * contents from the parser context. The buffer parameter must not be
12953 * NULL, but the filename parameter can be
12954 */
12955void
12956xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12957 const char* filename)
12958{
12959 xmlParserInputPtr input;
12960
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012961 if ((ctxt == NULL) || (buffer == NULL))
12962 return;
12963
Owen Taylor3473f882001-02-23 17:55:21 +000012964 input = xmlNewInputStream(ctxt);
12965 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012966 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012967 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012968 return;
12969 }
12970
12971 xmlClearParserCtxt(ctxt);
12972 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012973 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012974 input->base = buffer;
12975 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012976 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012977 inputPush(ctxt, input);
12978}
12979
12980/**
12981 * xmlSAXUserParseFile:
12982 * @sax: a SAX handler
12983 * @user_data: The user data returned on SAX callbacks
12984 * @filename: a file name
12985 *
12986 * parse an XML file and call the given SAX handler routines.
12987 * Automatic support for ZLIB/Compress compressed document is provided
12988 *
12989 * Returns 0 in case of success or a error number otherwise
12990 */
12991int
12992xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12993 const char *filename) {
12994 int ret = 0;
12995 xmlParserCtxtPtr ctxt;
12996
12997 ctxt = xmlCreateFileParserCtxt(filename);
12998 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012999 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013000 xmlFree(ctxt->sax);
13001 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013002 xmlDetectSAX2(ctxt);
13003
Owen Taylor3473f882001-02-23 17:55:21 +000013004 if (user_data != NULL)
13005 ctxt->userData = user_data;
13006
13007 xmlParseDocument(ctxt);
13008
13009 if (ctxt->wellFormed)
13010 ret = 0;
13011 else {
13012 if (ctxt->errNo != 0)
13013 ret = ctxt->errNo;
13014 else
13015 ret = -1;
13016 }
13017 if (sax != NULL)
13018 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013019 if (ctxt->myDoc != NULL) {
13020 xmlFreeDoc(ctxt->myDoc);
13021 ctxt->myDoc = NULL;
13022 }
Owen Taylor3473f882001-02-23 17:55:21 +000013023 xmlFreeParserCtxt(ctxt);
13024
13025 return ret;
13026}
Daniel Veillard81273902003-09-30 00:43:48 +000013027#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013028
13029/************************************************************************
13030 * *
13031 * Front ends when parsing from memory *
13032 * *
13033 ************************************************************************/
13034
13035/**
13036 * xmlCreateMemoryParserCtxt:
13037 * @buffer: a pointer to a char array
13038 * @size: the size of the array
13039 *
13040 * Create a parser context for an XML in-memory document.
13041 *
13042 * Returns the new parser context or NULL
13043 */
13044xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013045xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013046 xmlParserCtxtPtr ctxt;
13047 xmlParserInputPtr input;
13048 xmlParserInputBufferPtr buf;
13049
13050 if (buffer == NULL)
13051 return(NULL);
13052 if (size <= 0)
13053 return(NULL);
13054
13055 ctxt = xmlNewParserCtxt();
13056 if (ctxt == NULL)
13057 return(NULL);
13058
Daniel Veillard53350552003-09-18 13:35:51 +000013059 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013060 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013061 if (buf == NULL) {
13062 xmlFreeParserCtxt(ctxt);
13063 return(NULL);
13064 }
Owen Taylor3473f882001-02-23 17:55:21 +000013065
13066 input = xmlNewInputStream(ctxt);
13067 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013068 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013069 xmlFreeParserCtxt(ctxt);
13070 return(NULL);
13071 }
13072
13073 input->filename = NULL;
13074 input->buf = buf;
13075 input->base = input->buf->buffer->content;
13076 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013077 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013078
13079 inputPush(ctxt, input);
13080 return(ctxt);
13081}
13082
Daniel Veillard81273902003-09-30 00:43:48 +000013083#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013084/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013085 * xmlSAXParseMemoryWithData:
13086 * @sax: the SAX handler block
13087 * @buffer: an pointer to a char array
13088 * @size: the size of the array
13089 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13090 * documents
13091 * @data: the userdata
13092 *
13093 * parse an XML in-memory block and use the given SAX function block
13094 * to handle the parsing callback. If sax is NULL, fallback to the default
13095 * DOM tree building routines.
13096 *
13097 * User data (void *) is stored within the parser context in the
13098 * context's _private member, so it is available nearly everywhere in libxml
13099 *
13100 * Returns the resulting document tree
13101 */
13102
13103xmlDocPtr
13104xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13105 int size, int recovery, void *data) {
13106 xmlDocPtr ret;
13107 xmlParserCtxtPtr ctxt;
13108
13109 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13110 if (ctxt == NULL) return(NULL);
13111 if (sax != NULL) {
13112 if (ctxt->sax != NULL)
13113 xmlFree(ctxt->sax);
13114 ctxt->sax = sax;
13115 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013116 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013117 if (data!=NULL) {
13118 ctxt->_private=data;
13119 }
13120
Daniel Veillardadba5f12003-04-04 16:09:01 +000013121 ctxt->recovery = recovery;
13122
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013123 xmlParseDocument(ctxt);
13124
13125 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13126 else {
13127 ret = NULL;
13128 xmlFreeDoc(ctxt->myDoc);
13129 ctxt->myDoc = NULL;
13130 }
13131 if (sax != NULL)
13132 ctxt->sax = NULL;
13133 xmlFreeParserCtxt(ctxt);
13134
13135 return(ret);
13136}
13137
13138/**
Owen Taylor3473f882001-02-23 17:55:21 +000013139 * xmlSAXParseMemory:
13140 * @sax: the SAX handler block
13141 * @buffer: an pointer to a char array
13142 * @size: the size of the array
13143 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13144 * documents
13145 *
13146 * parse an XML in-memory block and use the given SAX function block
13147 * to handle the parsing callback. If sax is NULL, fallback to the default
13148 * DOM tree building routines.
13149 *
13150 * Returns the resulting document tree
13151 */
13152xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013153xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13154 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013155 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013156}
13157
13158/**
13159 * xmlParseMemory:
13160 * @buffer: an pointer to a char array
13161 * @size: the size of the array
13162 *
13163 * parse an XML in-memory block and build a tree.
13164 *
13165 * Returns the resulting document tree
13166 */
13167
Daniel Veillard50822cb2001-07-26 20:05:51 +000013168xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013169 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13170}
13171
13172/**
13173 * xmlRecoverMemory:
13174 * @buffer: an pointer to a char array
13175 * @size: the size of the array
13176 *
13177 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013178 * In the case the document is not Well Formed, an attempt to
13179 * build a tree is tried anyway
13180 *
13181 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013182 */
13183
Daniel Veillard50822cb2001-07-26 20:05:51 +000013184xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013185 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13186}
13187
13188/**
13189 * xmlSAXUserParseMemory:
13190 * @sax: a SAX handler
13191 * @user_data: The user data returned on SAX callbacks
13192 * @buffer: an in-memory XML document input
13193 * @size: the length of the XML document in bytes
13194 *
13195 * A better SAX parsing routine.
13196 * parse an XML in-memory buffer and call the given SAX handler routines.
13197 *
13198 * Returns 0 in case of success or a error number otherwise
13199 */
13200int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013201 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013202 int ret = 0;
13203 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013204
13205 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13206 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013207 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13208 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013209 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013210 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013211
Daniel Veillard30211a02001-04-26 09:33:18 +000013212 if (user_data != NULL)
13213 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013214
13215 xmlParseDocument(ctxt);
13216
13217 if (ctxt->wellFormed)
13218 ret = 0;
13219 else {
13220 if (ctxt->errNo != 0)
13221 ret = ctxt->errNo;
13222 else
13223 ret = -1;
13224 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013225 if (sax != NULL)
13226 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013227 if (ctxt->myDoc != NULL) {
13228 xmlFreeDoc(ctxt->myDoc);
13229 ctxt->myDoc = NULL;
13230 }
Owen Taylor3473f882001-02-23 17:55:21 +000013231 xmlFreeParserCtxt(ctxt);
13232
13233 return ret;
13234}
Daniel Veillard81273902003-09-30 00:43:48 +000013235#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013236
13237/**
13238 * xmlCreateDocParserCtxt:
13239 * @cur: a pointer to an array of xmlChar
13240 *
13241 * Creates a parser context for an XML in-memory document.
13242 *
13243 * Returns the new parser context or NULL
13244 */
13245xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013247 int len;
13248
13249 if (cur == NULL)
13250 return(NULL);
13251 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013252 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013253}
13254
Daniel Veillard81273902003-09-30 00:43:48 +000013255#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013256/**
13257 * xmlSAXParseDoc:
13258 * @sax: the SAX handler block
13259 * @cur: a pointer to an array of xmlChar
13260 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13261 * documents
13262 *
13263 * parse an XML in-memory document and build a tree.
13264 * It use the given SAX function block to handle the parsing callback.
13265 * If sax is NULL, fallback to the default DOM tree building routines.
13266 *
13267 * Returns the resulting document tree
13268 */
13269
13270xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013271xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013272 xmlDocPtr ret;
13273 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013274 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013275
Daniel Veillard38936062004-11-04 17:45:11 +000013276 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013277
13278
13279 ctxt = xmlCreateDocParserCtxt(cur);
13280 if (ctxt == NULL) return(NULL);
13281 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013282 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013283 ctxt->sax = sax;
13284 ctxt->userData = NULL;
13285 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013286 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013287
13288 xmlParseDocument(ctxt);
13289 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13290 else {
13291 ret = NULL;
13292 xmlFreeDoc(ctxt->myDoc);
13293 ctxt->myDoc = NULL;
13294 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013295 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013296 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013297 xmlFreeParserCtxt(ctxt);
13298
13299 return(ret);
13300}
13301
13302/**
13303 * xmlParseDoc:
13304 * @cur: a pointer to an array of xmlChar
13305 *
13306 * parse an XML in-memory document and build a tree.
13307 *
13308 * Returns the resulting document tree
13309 */
13310
13311xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013312xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013313 return(xmlSAXParseDoc(NULL, cur, 0));
13314}
Daniel Veillard81273902003-09-30 00:43:48 +000013315#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013316
Daniel Veillard81273902003-09-30 00:43:48 +000013317#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013318/************************************************************************
13319 * *
13320 * Specific function to keep track of entities references *
13321 * and used by the XSLT debugger *
13322 * *
13323 ************************************************************************/
13324
13325static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13326
13327/**
13328 * xmlAddEntityReference:
13329 * @ent : A valid entity
13330 * @firstNode : A valid first node for children of entity
13331 * @lastNode : A valid last node of children entity
13332 *
13333 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13334 */
13335static void
13336xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13337 xmlNodePtr lastNode)
13338{
13339 if (xmlEntityRefFunc != NULL) {
13340 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13341 }
13342}
13343
13344
13345/**
13346 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013347 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013348 *
13349 * Set the function to call call back when a xml reference has been made
13350 */
13351void
13352xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13353{
13354 xmlEntityRefFunc = func;
13355}
Daniel Veillard81273902003-09-30 00:43:48 +000013356#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013357
13358/************************************************************************
13359 * *
13360 * Miscellaneous *
13361 * *
13362 ************************************************************************/
13363
13364#ifdef LIBXML_XPATH_ENABLED
13365#include <libxml/xpath.h>
13366#endif
13367
Daniel Veillardffa3c742005-07-21 13:24:09 +000013368extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013369static int xmlParserInitialized = 0;
13370
13371/**
13372 * xmlInitParser:
13373 *
13374 * Initialization function for the XML parser.
13375 * This is not reentrant. Call once before processing in case of
13376 * use in multithreaded programs.
13377 */
13378
13379void
13380xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013381 if (xmlParserInitialized != 0)
13382 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013383
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013384#ifdef LIBXML_THREAD_ENABLED
13385 __xmlGlobalInitMutexLock();
13386 if (xmlParserInitialized == 0) {
13387#endif
13388 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13389 (xmlGenericError == NULL))
13390 initGenericErrorDefaultFunc(NULL);
13391 xmlInitGlobals();
13392 xmlInitThreads();
13393 xmlInitMemory();
13394 xmlInitCharEncodingHandlers();
13395 xmlDefaultSAXHandlerInit();
13396 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013397#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013398 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013399#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013400#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013401 htmlInitAutoClose();
13402 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013403#endif
13404#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013405 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013406#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013407 xmlParserInitialized = 1;
13408#ifdef LIBXML_THREAD_ENABLED
13409 }
13410 __xmlGlobalInitMutexUnlock();
13411#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013412}
13413
13414/**
13415 * xmlCleanupParser:
13416 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013417 * This function name is somewhat misleading. It does not clean up
13418 * parser state, it cleans up memory allocated by the library itself.
13419 * It is a cleanup function for the XML library. It tries to reclaim all
13420 * related global memory allocated for the library processing.
13421 * It doesn't deallocate any document related memory. One should
13422 * call xmlCleanupParser() only when the process has finished using
13423 * the library and all XML/HTML documents built with it.
13424 * See also xmlInitParser() which has the opposite function of preparing
13425 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013426 */
13427
13428void
13429xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013430 if (!xmlParserInitialized)
13431 return;
13432
Owen Taylor3473f882001-02-23 17:55:21 +000013433 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013434#ifdef LIBXML_CATALOG_ENABLED
13435 xmlCatalogCleanup();
13436#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013437 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013438 xmlCleanupInputCallbacks();
13439#ifdef LIBXML_OUTPUT_ENABLED
13440 xmlCleanupOutputCallbacks();
13441#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013442#ifdef LIBXML_SCHEMAS_ENABLED
13443 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013444 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013445#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013446 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013447 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013448 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013449 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013450 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013451}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013452
13453/************************************************************************
13454 * *
13455 * New set (2.6.0) of simpler and more flexible APIs *
13456 * *
13457 ************************************************************************/
13458
13459/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013460 * DICT_FREE:
13461 * @str: a string
13462 *
13463 * Free a string if it is not owned by the "dict" dictionnary in the
13464 * current scope
13465 */
13466#define DICT_FREE(str) \
13467 if ((str) && ((!dict) || \
13468 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13469 xmlFree((char *)(str));
13470
13471/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013472 * xmlCtxtReset:
13473 * @ctxt: an XML parser context
13474 *
13475 * Reset a parser context
13476 */
13477void
13478xmlCtxtReset(xmlParserCtxtPtr ctxt)
13479{
13480 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013481 xmlDictPtr dict;
13482
13483 if (ctxt == NULL)
13484 return;
13485
13486 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013487
13488 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13489 xmlFreeInputStream(input);
13490 }
13491 ctxt->inputNr = 0;
13492 ctxt->input = NULL;
13493
13494 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013495 if (ctxt->spaceTab != NULL) {
13496 ctxt->spaceTab[0] = -1;
13497 ctxt->space = &ctxt->spaceTab[0];
13498 } else {
13499 ctxt->space = NULL;
13500 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013501
13502
13503 ctxt->nodeNr = 0;
13504 ctxt->node = NULL;
13505
13506 ctxt->nameNr = 0;
13507 ctxt->name = NULL;
13508
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013509 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013510 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013511 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013512 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013513 DICT_FREE(ctxt->directory);
13514 ctxt->directory = NULL;
13515 DICT_FREE(ctxt->extSubURI);
13516 ctxt->extSubURI = NULL;
13517 DICT_FREE(ctxt->extSubSystem);
13518 ctxt->extSubSystem = NULL;
13519 if (ctxt->myDoc != NULL)
13520 xmlFreeDoc(ctxt->myDoc);
13521 ctxt->myDoc = NULL;
13522
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013523 ctxt->standalone = -1;
13524 ctxt->hasExternalSubset = 0;
13525 ctxt->hasPErefs = 0;
13526 ctxt->html = 0;
13527 ctxt->external = 0;
13528 ctxt->instate = XML_PARSER_START;
13529 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013530
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013531 ctxt->wellFormed = 1;
13532 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013533 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013534 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013535#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013536 ctxt->vctxt.userData = ctxt;
13537 ctxt->vctxt.error = xmlParserValidityError;
13538 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013539#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013540 ctxt->record_info = 0;
13541 ctxt->nbChars = 0;
13542 ctxt->checkIndex = 0;
13543 ctxt->inSubset = 0;
13544 ctxt->errNo = XML_ERR_OK;
13545 ctxt->depth = 0;
13546 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13547 ctxt->catalogs = NULL;
13548 xmlInitNodeInfoSeq(&ctxt->node_seq);
13549
13550 if (ctxt->attsDefault != NULL) {
13551 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13552 ctxt->attsDefault = NULL;
13553 }
13554 if (ctxt->attsSpecial != NULL) {
13555 xmlHashFree(ctxt->attsSpecial, NULL);
13556 ctxt->attsSpecial = NULL;
13557 }
13558
Daniel Veillard4432df22003-09-28 18:58:27 +000013559#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013560 if (ctxt->catalogs != NULL)
13561 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013562#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013563 if (ctxt->lastError.code != XML_ERR_OK)
13564 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013565}
13566
13567/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013568 * xmlCtxtResetPush:
13569 * @ctxt: an XML parser context
13570 * @chunk: a pointer to an array of chars
13571 * @size: number of chars in the array
13572 * @filename: an optional file name or URI
13573 * @encoding: the document encoding, or NULL
13574 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013575 * Reset a push parser context
13576 *
13577 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013578 */
13579int
13580xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13581 int size, const char *filename, const char *encoding)
13582{
13583 xmlParserInputPtr inputStream;
13584 xmlParserInputBufferPtr buf;
13585 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13586
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013587 if (ctxt == NULL)
13588 return(1);
13589
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013590 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13591 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13592
13593 buf = xmlAllocParserInputBuffer(enc);
13594 if (buf == NULL)
13595 return(1);
13596
13597 if (ctxt == NULL) {
13598 xmlFreeParserInputBuffer(buf);
13599 return(1);
13600 }
13601
13602 xmlCtxtReset(ctxt);
13603
13604 if (ctxt->pushTab == NULL) {
13605 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13606 sizeof(xmlChar *));
13607 if (ctxt->pushTab == NULL) {
13608 xmlErrMemory(ctxt, NULL);
13609 xmlFreeParserInputBuffer(buf);
13610 return(1);
13611 }
13612 }
13613
13614 if (filename == NULL) {
13615 ctxt->directory = NULL;
13616 } else {
13617 ctxt->directory = xmlParserGetDirectory(filename);
13618 }
13619
13620 inputStream = xmlNewInputStream(ctxt);
13621 if (inputStream == NULL) {
13622 xmlFreeParserInputBuffer(buf);
13623 return(1);
13624 }
13625
13626 if (filename == NULL)
13627 inputStream->filename = NULL;
13628 else
13629 inputStream->filename = (char *)
13630 xmlCanonicPath((const xmlChar *) filename);
13631 inputStream->buf = buf;
13632 inputStream->base = inputStream->buf->buffer->content;
13633 inputStream->cur = inputStream->buf->buffer->content;
13634 inputStream->end =
13635 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13636
13637 inputPush(ctxt, inputStream);
13638
13639 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13640 (ctxt->input->buf != NULL)) {
13641 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13642 int cur = ctxt->input->cur - ctxt->input->base;
13643
13644 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13645
13646 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13647 ctxt->input->cur = ctxt->input->base + cur;
13648 ctxt->input->end =
13649 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13650 use];
13651#ifdef DEBUG_PUSH
13652 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13653#endif
13654 }
13655
13656 if (encoding != NULL) {
13657 xmlCharEncodingHandlerPtr hdlr;
13658
13659 hdlr = xmlFindCharEncodingHandler(encoding);
13660 if (hdlr != NULL) {
13661 xmlSwitchToEncoding(ctxt, hdlr);
13662 } else {
13663 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13664 "Unsupported encoding %s\n", BAD_CAST encoding);
13665 }
13666 } else if (enc != XML_CHAR_ENCODING_NONE) {
13667 xmlSwitchEncoding(ctxt, enc);
13668 }
13669
13670 return(0);
13671}
13672
13673/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013674 * xmlCtxtUseOptions:
13675 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013676 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013677 *
13678 * Applies the options to the parser context
13679 *
13680 * Returns 0 in case of success, the set of unknown or unimplemented options
13681 * in case of error.
13682 */
13683int
13684xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13685{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013686 if (ctxt == NULL)
13687 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013688 if (options & XML_PARSE_RECOVER) {
13689 ctxt->recovery = 1;
13690 options -= XML_PARSE_RECOVER;
13691 } else
13692 ctxt->recovery = 0;
13693 if (options & XML_PARSE_DTDLOAD) {
13694 ctxt->loadsubset = XML_DETECT_IDS;
13695 options -= XML_PARSE_DTDLOAD;
13696 } else
13697 ctxt->loadsubset = 0;
13698 if (options & XML_PARSE_DTDATTR) {
13699 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13700 options -= XML_PARSE_DTDATTR;
13701 }
13702 if (options & XML_PARSE_NOENT) {
13703 ctxt->replaceEntities = 1;
13704 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13705 options -= XML_PARSE_NOENT;
13706 } else
13707 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013708 if (options & XML_PARSE_PEDANTIC) {
13709 ctxt->pedantic = 1;
13710 options -= XML_PARSE_PEDANTIC;
13711 } else
13712 ctxt->pedantic = 0;
13713 if (options & XML_PARSE_NOBLANKS) {
13714 ctxt->keepBlanks = 0;
13715 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13716 options -= XML_PARSE_NOBLANKS;
13717 } else
13718 ctxt->keepBlanks = 1;
13719 if (options & XML_PARSE_DTDVALID) {
13720 ctxt->validate = 1;
13721 if (options & XML_PARSE_NOWARNING)
13722 ctxt->vctxt.warning = NULL;
13723 if (options & XML_PARSE_NOERROR)
13724 ctxt->vctxt.error = NULL;
13725 options -= XML_PARSE_DTDVALID;
13726 } else
13727 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013728 if (options & XML_PARSE_NOWARNING) {
13729 ctxt->sax->warning = NULL;
13730 options -= XML_PARSE_NOWARNING;
13731 }
13732 if (options & XML_PARSE_NOERROR) {
13733 ctxt->sax->error = NULL;
13734 ctxt->sax->fatalError = NULL;
13735 options -= XML_PARSE_NOERROR;
13736 }
Daniel Veillard81273902003-09-30 00:43:48 +000013737#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013738 if (options & XML_PARSE_SAX1) {
13739 ctxt->sax->startElement = xmlSAX2StartElement;
13740 ctxt->sax->endElement = xmlSAX2EndElement;
13741 ctxt->sax->startElementNs = NULL;
13742 ctxt->sax->endElementNs = NULL;
13743 ctxt->sax->initialized = 1;
13744 options -= XML_PARSE_SAX1;
13745 }
Daniel Veillard81273902003-09-30 00:43:48 +000013746#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013747 if (options & XML_PARSE_NODICT) {
13748 ctxt->dictNames = 0;
13749 options -= XML_PARSE_NODICT;
13750 } else {
13751 ctxt->dictNames = 1;
13752 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013753 if (options & XML_PARSE_NOCDATA) {
13754 ctxt->sax->cdataBlock = NULL;
13755 options -= XML_PARSE_NOCDATA;
13756 }
13757 if (options & XML_PARSE_NSCLEAN) {
13758 ctxt->options |= XML_PARSE_NSCLEAN;
13759 options -= XML_PARSE_NSCLEAN;
13760 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013761 if (options & XML_PARSE_NONET) {
13762 ctxt->options |= XML_PARSE_NONET;
13763 options -= XML_PARSE_NONET;
13764 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013765 if (options & XML_PARSE_COMPACT) {
13766 ctxt->options |= XML_PARSE_COMPACT;
13767 options -= XML_PARSE_COMPACT;
13768 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000013769 if (options & XML_PARSE_OLD10) {
13770 ctxt->options |= XML_PARSE_OLD10;
13771 options -= XML_PARSE_OLD10;
13772 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013773 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013774 return (options);
13775}
13776
13777/**
13778 * xmlDoRead:
13779 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013780 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013781 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013782 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013783 * @reuse: keep the context for reuse
13784 *
13785 * Common front-end for the xmlRead functions
13786 *
13787 * Returns the resulting document tree or NULL
13788 */
13789static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013790xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13791 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013792{
13793 xmlDocPtr ret;
13794
13795 xmlCtxtUseOptions(ctxt, options);
13796 if (encoding != NULL) {
13797 xmlCharEncodingHandlerPtr hdlr;
13798
13799 hdlr = xmlFindCharEncodingHandler(encoding);
13800 if (hdlr != NULL)
13801 xmlSwitchToEncoding(ctxt, hdlr);
13802 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013803 if ((URL != NULL) && (ctxt->input != NULL) &&
13804 (ctxt->input->filename == NULL))
13805 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013806 xmlParseDocument(ctxt);
13807 if ((ctxt->wellFormed) || ctxt->recovery)
13808 ret = ctxt->myDoc;
13809 else {
13810 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013811 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013812 xmlFreeDoc(ctxt->myDoc);
13813 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013814 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013815 ctxt->myDoc = NULL;
13816 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013817 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013818 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013819
13820 return (ret);
13821}
13822
13823/**
13824 * xmlReadDoc:
13825 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013826 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013827 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013828 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013829 *
13830 * parse an XML in-memory document and build a tree.
13831 *
13832 * Returns the resulting document tree
13833 */
13834xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013835xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013836{
13837 xmlParserCtxtPtr ctxt;
13838
13839 if (cur == NULL)
13840 return (NULL);
13841
13842 ctxt = xmlCreateDocParserCtxt(cur);
13843 if (ctxt == NULL)
13844 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013845 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013846}
13847
13848/**
13849 * xmlReadFile:
13850 * @filename: a file or URL
13851 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013852 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013853 *
13854 * parse an XML file from the filesystem or the network.
13855 *
13856 * Returns the resulting document tree
13857 */
13858xmlDocPtr
13859xmlReadFile(const char *filename, const char *encoding, int options)
13860{
13861 xmlParserCtxtPtr ctxt;
13862
Daniel Veillard61b93382003-11-03 14:28:31 +000013863 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013864 if (ctxt == NULL)
13865 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013866 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013867}
13868
13869/**
13870 * xmlReadMemory:
13871 * @buffer: a pointer to a char array
13872 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013873 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013874 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013875 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013876 *
13877 * parse an XML in-memory document and build a tree.
13878 *
13879 * Returns the resulting document tree
13880 */
13881xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013882xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013883{
13884 xmlParserCtxtPtr ctxt;
13885
13886 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13887 if (ctxt == NULL)
13888 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013889 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013890}
13891
13892/**
13893 * xmlReadFd:
13894 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013895 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013896 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013897 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013898 *
13899 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013900 * NOTE that the file descriptor will not be closed when the
13901 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013902 *
13903 * Returns the resulting document tree
13904 */
13905xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013906xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013907{
13908 xmlParserCtxtPtr ctxt;
13909 xmlParserInputBufferPtr input;
13910 xmlParserInputPtr stream;
13911
13912 if (fd < 0)
13913 return (NULL);
13914
13915 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13916 if (input == NULL)
13917 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013918 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013919 ctxt = xmlNewParserCtxt();
13920 if (ctxt == NULL) {
13921 xmlFreeParserInputBuffer(input);
13922 return (NULL);
13923 }
13924 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13925 if (stream == NULL) {
13926 xmlFreeParserInputBuffer(input);
13927 xmlFreeParserCtxt(ctxt);
13928 return (NULL);
13929 }
13930 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013931 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013932}
13933
13934/**
13935 * xmlReadIO:
13936 * @ioread: an I/O read function
13937 * @ioclose: an I/O close function
13938 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013939 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013940 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013941 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013942 *
13943 * parse an XML document from I/O functions and source and build a tree.
13944 *
13945 * Returns the resulting document tree
13946 */
13947xmlDocPtr
13948xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013949 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013950{
13951 xmlParserCtxtPtr ctxt;
13952 xmlParserInputBufferPtr input;
13953 xmlParserInputPtr stream;
13954
13955 if (ioread == NULL)
13956 return (NULL);
13957
13958 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13959 XML_CHAR_ENCODING_NONE);
13960 if (input == NULL)
13961 return (NULL);
13962 ctxt = xmlNewParserCtxt();
13963 if (ctxt == NULL) {
13964 xmlFreeParserInputBuffer(input);
13965 return (NULL);
13966 }
13967 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13968 if (stream == NULL) {
13969 xmlFreeParserInputBuffer(input);
13970 xmlFreeParserCtxt(ctxt);
13971 return (NULL);
13972 }
13973 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013974 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013975}
13976
13977/**
13978 * xmlCtxtReadDoc:
13979 * @ctxt: an XML parser context
13980 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013981 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013982 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013983 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984 *
13985 * parse an XML in-memory document and build a tree.
13986 * This reuses the existing @ctxt parser context
13987 *
13988 * Returns the resulting document tree
13989 */
13990xmlDocPtr
13991xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013992 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013993{
13994 xmlParserInputPtr stream;
13995
13996 if (cur == NULL)
13997 return (NULL);
13998 if (ctxt == NULL)
13999 return (NULL);
14000
14001 xmlCtxtReset(ctxt);
14002
14003 stream = xmlNewStringInputStream(ctxt, cur);
14004 if (stream == NULL) {
14005 return (NULL);
14006 }
14007 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014008 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014009}
14010
14011/**
14012 * xmlCtxtReadFile:
14013 * @ctxt: an XML parser context
14014 * @filename: a file or URL
14015 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014016 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014017 *
14018 * parse an XML file from the filesystem or the network.
14019 * This reuses the existing @ctxt parser context
14020 *
14021 * Returns the resulting document tree
14022 */
14023xmlDocPtr
14024xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14025 const char *encoding, int options)
14026{
14027 xmlParserInputPtr stream;
14028
14029 if (filename == NULL)
14030 return (NULL);
14031 if (ctxt == NULL)
14032 return (NULL);
14033
14034 xmlCtxtReset(ctxt);
14035
Daniel Veillard29614c72004-11-26 10:47:26 +000014036 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014037 if (stream == NULL) {
14038 return (NULL);
14039 }
14040 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014041 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014042}
14043
14044/**
14045 * xmlCtxtReadMemory:
14046 * @ctxt: an XML parser context
14047 * @buffer: a pointer to a char array
14048 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014049 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014050 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014051 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014052 *
14053 * parse an XML in-memory document and build a tree.
14054 * This reuses the existing @ctxt parser context
14055 *
14056 * Returns the resulting document tree
14057 */
14058xmlDocPtr
14059xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014060 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014061{
14062 xmlParserInputBufferPtr input;
14063 xmlParserInputPtr stream;
14064
14065 if (ctxt == NULL)
14066 return (NULL);
14067 if (buffer == NULL)
14068 return (NULL);
14069
14070 xmlCtxtReset(ctxt);
14071
14072 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14073 if (input == NULL) {
14074 return(NULL);
14075 }
14076
14077 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14078 if (stream == NULL) {
14079 xmlFreeParserInputBuffer(input);
14080 return(NULL);
14081 }
14082
14083 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014084 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014085}
14086
14087/**
14088 * xmlCtxtReadFd:
14089 * @ctxt: an XML parser context
14090 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014091 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014092 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014093 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014094 *
14095 * parse an XML from a file descriptor and build a tree.
14096 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014097 * NOTE that the file descriptor will not be closed when the
14098 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014099 *
14100 * Returns the resulting document tree
14101 */
14102xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014103xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14104 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014105{
14106 xmlParserInputBufferPtr input;
14107 xmlParserInputPtr stream;
14108
14109 if (fd < 0)
14110 return (NULL);
14111 if (ctxt == NULL)
14112 return (NULL);
14113
14114 xmlCtxtReset(ctxt);
14115
14116
14117 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14118 if (input == NULL)
14119 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014120 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014121 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14122 if (stream == NULL) {
14123 xmlFreeParserInputBuffer(input);
14124 return (NULL);
14125 }
14126 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014127 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014128}
14129
14130/**
14131 * xmlCtxtReadIO:
14132 * @ctxt: an XML parser context
14133 * @ioread: an I/O read function
14134 * @ioclose: an I/O close function
14135 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014136 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014138 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014139 *
14140 * parse an XML document from I/O functions and source and build a tree.
14141 * This reuses the existing @ctxt parser context
14142 *
14143 * Returns the resulting document tree
14144 */
14145xmlDocPtr
14146xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14147 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014148 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014149 const char *encoding, int options)
14150{
14151 xmlParserInputBufferPtr input;
14152 xmlParserInputPtr stream;
14153
14154 if (ioread == NULL)
14155 return (NULL);
14156 if (ctxt == NULL)
14157 return (NULL);
14158
14159 xmlCtxtReset(ctxt);
14160
14161 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14162 XML_CHAR_ENCODING_NONE);
14163 if (input == NULL)
14164 return (NULL);
14165 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14166 if (stream == NULL) {
14167 xmlFreeParserInputBuffer(input);
14168 return (NULL);
14169 }
14170 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014171 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014172}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014173
14174#define bottom_parser
14175#include "elfgcchack.h"