blob: a3915dc08e12c36203d23b74d4eade1e7100f768 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard37334572008-07-31 08:20:02 +0000119static int
120xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
121 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000122#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000123static void
124xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
125 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000126#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000127
Daniel Veillard7d515752003-09-26 19:12:37 +0000128static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000129xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
130 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000131
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000132static int
133xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
134
Daniel Veillarde57ec792003-09-10 10:50:59 +0000135/************************************************************************
136 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000137 * Some factorized error routines *
138 * *
139 ************************************************************************/
140
141/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000142 * xmlErrAttributeDup:
143 * @ctxt: an XML parser context
144 * @prefix: the attribute prefix
145 * @localname: the attribute localname
146 *
147 * Handle a redefinition of attribute error
148 */
149static void
150xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
151 const xmlChar * localname)
152{
Daniel Veillard157fee02003-10-31 10:36:03 +0000153 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
154 (ctxt->instate == XML_PARSER_EOF))
155 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000156 if (ctxt != NULL)
157 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000158 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000159 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000160 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
161 (const char *) localname, NULL, NULL, 0, 0,
162 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000163 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000164 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000165 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
166 (const char *) prefix, (const char *) localname,
167 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
168 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000169 if (ctxt != NULL) {
170 ctxt->wellFormed = 0;
171 if (ctxt->recovery == 0)
172 ctxt->disableSAX = 1;
173 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174}
175
176/**
177 * xmlFatalErr:
178 * @ctxt: an XML parser context
179 * @error: the error number
180 * @extra: extra information string
181 *
182 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
183 */
184static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186{
187 const char *errmsg;
188
Daniel Veillard157fee02003-10-31 10:36:03 +0000189 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
190 (ctxt->instate == XML_PARSER_EOF))
191 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 switch (error) {
193 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid hexadecimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid decimal value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "CharRef: invalid value\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "internal error";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference at end of document\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in prolog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference in epilog\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: no name\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReference: expecting ';'\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "Detected an entity reference loop\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "PEReferences forbidden in internal subset\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "EntityValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "AttValue: \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unescaped '<' not allowed in attributes values\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SystemLiteral \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Unfinished System or Public ID \" or ' expected\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Sequence ']]>' not allowed in content\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "PUBLIC, the Public Identifier is missing\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Comment must not contain '--' (double-hyphen)\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "xmlParsePI : no target name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Invalid PI name\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "NOTATION: Name expected here\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'>' required to close NOTATION declaration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Entity value required\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "Fragment not allowed";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "'(' required to start ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "NmToken expected in ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "')' required to finish ATTLIST enumeration\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : Name or '(' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg =
297 "PEReference: forbidden within markup decl in internal subset\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "expected '>'\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "XML conditional section '[' expected\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "Content error in the external subset\n";
307 break;
308 case XML_ERR_CONDSEC_INVALID_KEYWORD:
309 errmsg =
310 "conditional section INCLUDE or IGNORE keyword expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "XML conditional section not closed\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Text declaration '<?xml' required\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "parsing XML declaration: '?>' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "external parsed entities cannot be standalone\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityRef: expecting ';'\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "DOCTYPE improperly terminated\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "EndTag: '</' not found\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "expected '='\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not closed expecting \" or '\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "String not started expecting ' or \"\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Invalid XML encoding name\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "standalone accepts only 'yes' or 'no'\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Document is empty\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Extra content at the end of the document\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "chunk is not well balanced\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "extra content at the end of well balanced chunk\n";
359 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000360 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "Malformed declaration expecting version\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 case:
365 errmsg = "\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 default:
369 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000371 if (ctxt != NULL)
372 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000373 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
375 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000376 if (ctxt != NULL) {
377 ctxt->wellFormed = 0;
378 if (ctxt->recovery == 0)
379 ctxt->disableSAX = 1;
380 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381}
382
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383/**
384 * xmlFatalErrMsg:
385 * @ctxt: an XML parser context
386 * @error: the error number
387 * @msg: the error message
388 *
389 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
390 */
391static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000394{
Daniel Veillard157fee02003-10-31 10:36:03 +0000395 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396 (ctxt->instate == XML_PARSER_EOF))
397 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000398 if (ctxt != NULL)
399 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000400 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000402 if (ctxt != NULL) {
403 ctxt->wellFormed = 0;
404 if (ctxt->recovery == 0)
405 ctxt->disableSAX = 1;
406 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000407}
408
409/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 * xmlWarningMsg:
411 * @ctxt: an XML parser context
412 * @error: the error number
413 * @msg: the error message
414 * @str1: extra data
415 * @str2: extra data
416 *
417 * Handle a warning.
418 */
419static void
420xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
421 const char *msg, const xmlChar *str1, const xmlChar *str2)
422{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000423 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000424
Daniel Veillard157fee02003-10-31 10:36:03 +0000425 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
426 (ctxt->instate == XML_PARSER_EOF))
427 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000428 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
429 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000430 schannel = ctxt->sax->serror;
431 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000432 (ctxt->sax) ? ctxt->sax->warning : NULL,
433 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000434 ctxt, NULL, XML_FROM_PARSER, error,
435 XML_ERR_WARNING, NULL, 0,
436 (const char *) str1, (const char *) str2, NULL, 0, 0,
437 msg, (const char *) str1, (const char *) str2);
438}
439
440/**
441 * xmlValidityError:
442 * @ctxt: an XML parser context
443 * @error: the error number
444 * @msg: the error message
445 * @str1: extra data
446 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000447 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000448 */
449static void
450xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000451 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000452{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000453 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000454
455 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
456 (ctxt->instate == XML_PARSER_EOF))
457 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000458 if (ctxt != NULL) {
459 ctxt->errNo = error;
460 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
461 schannel = ctxt->sax->serror;
462 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000463 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000465 ctxt, NULL, XML_FROM_DTD, error,
466 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000467 (const char *) str2, NULL, 0, 0,
468 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000469 if (ctxt != NULL) {
470 ctxt->valid = 0;
471 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000472}
473
474/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000475 * xmlFatalErrMsgInt:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 * @val: an integer value
480 *
481 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
482 */
483static void
484xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000486{
Daniel Veillard157fee02003-10-31 10:36:03 +0000487 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
488 (ctxt->instate == XML_PARSER_EOF))
489 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000490 if (ctxt != NULL)
491 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000492 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000493 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
494 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000495 if (ctxt != NULL) {
496 ctxt->wellFormed = 0;
497 if (ctxt->recovery == 0)
498 ctxt->disableSAX = 1;
499 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000500}
501
502/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000503 * xmlFatalErrMsgStrIntStr:
504 * @ctxt: an XML parser context
505 * @error: the error number
506 * @msg: the error message
507 * @str1: an string info
508 * @val: an integer value
509 * @str2: an string info
510 *
511 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
512 */
513static void
514xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
515 const char *msg, const xmlChar *str1, int val,
516 const xmlChar *str2)
517{
Daniel Veillard157fee02003-10-31 10:36:03 +0000518 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
519 (ctxt->instate == XML_PARSER_EOF))
520 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL)
522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000524 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) str1, (const char *) str2,
526 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000527 if (ctxt != NULL) {
528 ctxt->wellFormed = 0;
529 if (ctxt->recovery == 0)
530 ctxt->disableSAX = 1;
531 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000532}
533
534/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000535 * xmlFatalErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
542 */
543static void
544xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL)
551 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000553 XML_FROM_PARSER, error, XML_ERR_FATAL,
554 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
555 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000561}
562
563/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000564 * xmlErrMsgStr:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 * @val: a string value
569 *
570 * Handle a non fatal parser error
571 */
572static void
573xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
574 const char *msg, const xmlChar * val)
575{
Daniel Veillard157fee02003-10-31 10:36:03 +0000576 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
577 (ctxt->instate == XML_PARSER_EOF))
578 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000579 if (ctxt != NULL)
580 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000582 XML_FROM_PARSER, error, XML_ERR_ERROR,
583 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
584 val);
585}
586
587/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000588 * xmlNsErr:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the message
592 * @info1: extra information string
593 * @info2: extra information string
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597static void
598xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000600 const xmlChar * info1, const xmlChar * info2,
601 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000602{
Daniel Veillard157fee02003-10-31 10:36:03 +0000603 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
604 (ctxt->instate == XML_PARSER_EOF))
605 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000608 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000609 XML_ERR_ERROR, NULL, 0, (const char *) info1,
610 (const char *) info2, (const char *) info3, 0, 0, msg,
611 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000614}
615
Daniel Veillard37334572008-07-31 08:20:02 +0000616/**
617 * xmlNsWarn
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the message
621 * @info1: extra information string
622 * @info2: extra information string
623 *
624 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
625 */
626static void
627xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
628 const char *msg,
629 const xmlChar * info1, const xmlChar * info2,
630 const xmlChar * info3)
631{
632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
634 return;
635 if (ctxt != NULL)
636 ctxt->errNo = error;
637 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
638 XML_ERR_WARNING, NULL, 0, (const char *) info1,
639 (const char *) info2, (const char *) info3, 0, 0, msg,
640 info1, info2, info3);
641}
642
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000643/************************************************************************
644 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000645 * Library wide options *
646 * *
647 ************************************************************************/
648
649/**
650 * xmlHasFeature:
651 * @feature: the feature to be examined
652 *
653 * Examines if the library has been compiled with a given feature.
654 *
655 * Returns a non-zero value if the feature exist, otherwise zero.
656 * Returns zero (0) if the feature does not exist or an unknown
657 * unknown feature is requested, non-zero otherwise.
658 */
659int
660xmlHasFeature(xmlFeature feature)
661{
662 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_THREAD_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_TREE_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_OUTPUT_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_PUSH_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_READER_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_PATTERN_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_WRITER_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_SAX1_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_FTP_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_HTTP_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_VALID_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_HTML_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_LEGACY_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_C14N_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_CATALOG_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_XPATH_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_XPTR_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_XINCLUDE_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_ICONV_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_ISO8859X_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_UNICODE_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_REGEXP_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_AUTOMATA_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_EXPR_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_SCHEMAS_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SCHEMATRON_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_MODULES_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_DEBUG_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef DEBUG_MEMORY_LOCATION
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_DEBUG_RUNTIME
839 return(1);
840#else
841 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000842#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000843 case XML_WITH_ZLIB:
844#ifdef LIBXML_ZLIB_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849 default:
850 break;
851 }
852 return(0);
853}
854
855/************************************************************************
856 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857 * SAX2 defaulted attributes handling *
858 * *
859 ************************************************************************/
860
861/**
862 * xmlDetectSAX2:
863 * @ctxt: an XML parser context
864 *
865 * Do the SAX2 detection and specific intialization
866 */
867static void
868xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
869 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000870#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000871 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
872 ((ctxt->sax->startElementNs != NULL) ||
873 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000874#else
875 ctxt->sax2 = 1;
876#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877
878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000881 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
882 (ctxt->str_xml_ns == NULL)) {
883 xmlErrMemory(ctxt, NULL);
884 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000885}
886
Daniel Veillarde57ec792003-09-10 10:50:59 +0000887typedef struct _xmlDefAttrs xmlDefAttrs;
888typedef xmlDefAttrs *xmlDefAttrsPtr;
889struct _xmlDefAttrs {
890 int nbAttrs; /* number of defaulted attributes on that element */
891 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000892 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000893};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894
895/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000896 * xmlAttrNormalizeSpace:
897 * @src: the source string
898 * @dst: the target string
899 *
900 * Normalize the space in non CDATA attribute values:
901 * If the attribute type is not CDATA, then the XML processor MUST further
902 * process the normalized attribute value by discarding any leading and
903 * trailing space (#x20) characters, and by replacing sequences of space
904 * (#x20) characters by a single space (#x20) character.
905 * Note that the size of dst need to be at least src, and if one doesn't need
906 * to preserve dst (and it doesn't come from a dictionary or read-only) then
907 * passing src as dst is just fine.
908 *
909 * Returns a pointer to the normalized value (dst) or NULL if no conversion
910 * is needed.
911 */
912static xmlChar *
913xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
914{
915 if ((src == NULL) || (dst == NULL))
916 return(NULL);
917
918 while (*src == 0x20) src++;
919 while (*src != 0) {
920 if (*src == 0x20) {
921 while (*src == 0x20) src++;
922 if (*src != 0)
923 *dst++ = 0x20;
924 } else {
925 *dst++ = *src++;
926 }
927 }
928 *dst = 0;
929 if (dst == src)
930 return(NULL);
931 return(dst);
932}
933
934/**
935 * xmlAttrNormalizeSpace2:
936 * @src: the source string
937 *
938 * Normalize the space in non CDATA attribute values, a slightly more complex
939 * front end to avoid allocation problems when running on attribute values
940 * coming from the input.
941 *
942 * Returns a pointer to the normalized value (dst) or NULL if no conversion
943 * is needed.
944 */
945static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +0000946xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000947{
948 int i;
949 int remove_head = 0;
950 int need_realloc = 0;
951 const xmlChar *cur;
952
953 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
954 return(NULL);
955 i = *len;
956 if (i <= 0)
957 return(NULL);
958
959 cur = src;
960 while (*cur == 0x20) {
961 cur++;
962 remove_head++;
963 }
964 while (*cur != 0) {
965 if (*cur == 0x20) {
966 cur++;
967 if ((*cur == 0x20) || (*cur == 0)) {
968 need_realloc = 1;
969 break;
970 }
971 } else
972 cur++;
973 }
974 if (need_realloc) {
975 xmlChar *ret;
976
977 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
978 if (ret == NULL) {
979 xmlErrMemory(ctxt, NULL);
980 return(NULL);
981 }
982 xmlAttrNormalizeSpace(ret, ret);
983 *len = (int) strlen((const char *)ret);
984 return(ret);
985 } else if (remove_head) {
986 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +0000987 memmove(src, src + remove_head, 1 + *len);
988 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 }
990 return(NULL);
991}
992
993/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000994 * xmlAddDefAttrs:
995 * @ctxt: an XML parser context
996 * @fullname: the element fullname
997 * @fullattr: the attribute fullname
998 * @value: the attribute value
999 *
1000 * Add a defaulted attribute for an element
1001 */
1002static void
1003xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1004 const xmlChar *fullname,
1005 const xmlChar *fullattr,
1006 const xmlChar *value) {
1007 xmlDefAttrsPtr defaults;
1008 int len;
1009 const xmlChar *name;
1010 const xmlChar *prefix;
1011
Daniel Veillard6a31b832008-03-26 14:06:44 +00001012 /*
1013 * Allows to detect attribute redefinitions
1014 */
1015 if (ctxt->attsSpecial != NULL) {
1016 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1017 return;
1018 }
1019
Daniel Veillarde57ec792003-09-10 10:50:59 +00001020 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001021 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001022 if (ctxt->attsDefault == NULL)
1023 goto mem_error;
1024 }
1025
1026 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001027 * split the element name into prefix:localname , the string found
1028 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001029 */
1030 name = xmlSplitQName3(fullname, &len);
1031 if (name == NULL) {
1032 name = xmlDictLookup(ctxt->dict, fullname, -1);
1033 prefix = NULL;
1034 } else {
1035 name = xmlDictLookup(ctxt->dict, name, -1);
1036 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1037 }
1038
1039 /*
1040 * make sure there is some storage
1041 */
1042 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1043 if (defaults == NULL) {
1044 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001045 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001046 if (defaults == NULL)
1047 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001049 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001050 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1051 defaults, NULL) < 0) {
1052 xmlFree(defaults);
1053 goto mem_error;
1054 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001055 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001056 xmlDefAttrsPtr temp;
1057
1058 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001059 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001060 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001061 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001062 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001063 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001064 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1065 defaults, NULL) < 0) {
1066 xmlFree(defaults);
1067 goto mem_error;
1068 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001069 }
1070
1071 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001072 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001073 * are within the DTD and hen not associated to namespace names.
1074 */
1075 name = xmlSplitQName3(fullattr, &len);
1076 if (name == NULL) {
1077 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1078 prefix = NULL;
1079 } else {
1080 name = xmlDictLookup(ctxt->dict, name, -1);
1081 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1082 }
1083
Daniel Veillardae0765b2008-07-31 19:54:59 +00001084 defaults->values[5 * defaults->nbAttrs] = name;
1085 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 /* intern the string and precompute the end */
1087 len = xmlStrlen(value);
1088 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001089 defaults->values[5 * defaults->nbAttrs + 2] = value;
1090 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1091 if (ctxt->external)
1092 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1093 else
1094 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 defaults->nbAttrs++;
1096
1097 return;
1098
1099mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001100 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001101 return;
1102}
1103
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001104/**
1105 * xmlAddSpecialAttr:
1106 * @ctxt: an XML parser context
1107 * @fullname: the element fullname
1108 * @fullattr: the attribute fullname
1109 * @type: the attribute type
1110 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001111 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001112 */
1113static void
1114xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1115 const xmlChar *fullname,
1116 const xmlChar *fullattr,
1117 int type)
1118{
1119 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001120 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001121 if (ctxt->attsSpecial == NULL)
1122 goto mem_error;
1123 }
1124
Daniel Veillardac4118d2008-01-11 05:27:32 +00001125 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1126 return;
1127
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001128 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1129 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001130 return;
1131
1132mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001133 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001134 return;
1135}
1136
Daniel Veillard4432df22003-09-28 18:58:27 +00001137/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001138 * xmlCleanSpecialAttrCallback:
1139 *
1140 * Removes CDATA attributes from the special attribute table
1141 */
1142static void
1143xmlCleanSpecialAttrCallback(void *payload, void *data,
1144 const xmlChar *fullname, const xmlChar *fullattr,
1145 const xmlChar *unused ATTRIBUTE_UNUSED) {
1146 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1147
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001148 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001149 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1150 }
1151}
1152
1153/**
1154 * xmlCleanSpecialAttr:
1155 * @ctxt: an XML parser context
1156 *
1157 * Trim the list of attributes defined to remove all those of type
1158 * CDATA as they are not special. This call should be done when finishing
1159 * to parse the DTD and before starting to parse the document root.
1160 */
1161static void
1162xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1163{
1164 if (ctxt->attsSpecial == NULL)
1165 return;
1166
1167 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1168
1169 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1170 xmlHashFree(ctxt->attsSpecial, NULL);
1171 ctxt->attsSpecial = NULL;
1172 }
1173 return;
1174}
1175
1176/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001177 * xmlCheckLanguageID:
1178 * @lang: pointer to the string value
1179 *
1180 * Checks that the value conforms to the LanguageID production:
1181 *
1182 * NOTE: this is somewhat deprecated, those productions were removed from
1183 * the XML Second edition.
1184 *
1185 * [33] LanguageID ::= Langcode ('-' Subcode)*
1186 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1187 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1188 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1189 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1190 * [38] Subcode ::= ([a-z] | [A-Z])+
1191 *
1192 * Returns 1 if correct 0 otherwise
1193 **/
1194int
1195xmlCheckLanguageID(const xmlChar * lang)
1196{
1197 const xmlChar *cur = lang;
1198
1199 if (cur == NULL)
1200 return (0);
1201 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1202 ((cur[0] == 'I') && (cur[1] == '-'))) {
1203 /*
1204 * IANA code
1205 */
1206 cur += 2;
1207 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1208 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1209 cur++;
1210 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1211 ((cur[0] == 'X') && (cur[1] == '-'))) {
1212 /*
1213 * User code
1214 */
1215 cur += 2;
1216 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1217 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1218 cur++;
1219 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1220 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1221 /*
1222 * ISO639
1223 */
1224 cur++;
1225 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1226 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1227 cur++;
1228 else
1229 return (0);
1230 } else
1231 return (0);
1232 while (cur[0] != 0) { /* non input consuming */
1233 if (cur[0] != '-')
1234 return (0);
1235 cur++;
1236 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1237 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1238 cur++;
1239 else
1240 return (0);
1241 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1242 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1243 cur++;
1244 }
1245 return (1);
1246}
1247
Owen Taylor3473f882001-02-23 17:55:21 +00001248/************************************************************************
1249 * *
1250 * Parser stacks related functions and macros *
1251 * *
1252 ************************************************************************/
1253
1254xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1255 const xmlChar ** str);
1256
Daniel Veillard0fb18932003-09-07 09:14:37 +00001257#ifdef SAX2
1258/**
1259 * nsPush:
1260 * @ctxt: an XML parser context
1261 * @prefix: the namespace prefix or NULL
1262 * @URL: the namespace name
1263 *
1264 * Pushes a new parser namespace on top of the ns stack
1265 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001266 * Returns -1 in case of error, -2 if the namespace should be discarded
1267 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001268 */
1269static int
1270nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1271{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001272 if (ctxt->options & XML_PARSE_NSCLEAN) {
1273 int i;
1274 for (i = 0;i < ctxt->nsNr;i += 2) {
1275 if (ctxt->nsTab[i] == prefix) {
1276 /* in scope */
1277 if (ctxt->nsTab[i + 1] == URL)
1278 return(-2);
1279 /* out of scope keep it */
1280 break;
1281 }
1282 }
1283 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001284 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1285 ctxt->nsMax = 10;
1286 ctxt->nsNr = 0;
1287 ctxt->nsTab = (const xmlChar **)
1288 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1289 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001290 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001291 ctxt->nsMax = 0;
1292 return (-1);
1293 }
1294 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001295 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001296 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001297 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1298 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1299 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001300 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001301 ctxt->nsMax /= 2;
1302 return (-1);
1303 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001304 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001305 }
1306 ctxt->nsTab[ctxt->nsNr++] = prefix;
1307 ctxt->nsTab[ctxt->nsNr++] = URL;
1308 return (ctxt->nsNr);
1309}
1310/**
1311 * nsPop:
1312 * @ctxt: an XML parser context
1313 * @nr: the number to pop
1314 *
1315 * Pops the top @nr parser prefix/namespace from the ns stack
1316 *
1317 * Returns the number of namespaces removed
1318 */
1319static int
1320nsPop(xmlParserCtxtPtr ctxt, int nr)
1321{
1322 int i;
1323
1324 if (ctxt->nsTab == NULL) return(0);
1325 if (ctxt->nsNr < nr) {
1326 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1327 nr = ctxt->nsNr;
1328 }
1329 if (ctxt->nsNr <= 0)
1330 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001331
Daniel Veillard0fb18932003-09-07 09:14:37 +00001332 for (i = 0;i < nr;i++) {
1333 ctxt->nsNr--;
1334 ctxt->nsTab[ctxt->nsNr] = NULL;
1335 }
1336 return(nr);
1337}
1338#endif
1339
1340static int
1341xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1342 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001344 int maxatts;
1345
1346 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001347 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001348 atts = (const xmlChar **)
1349 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001350 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001351 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001352 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1353 if (attallocs == NULL) goto mem_error;
1354 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001355 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 } else if (nr + 5 > ctxt->maxatts) {
1357 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001358 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1359 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001362 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1363 (maxatts / 5) * sizeof(int));
1364 if (attallocs == NULL) goto mem_error;
1365 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001366 ctxt->maxatts = maxatts;
1367 }
1368 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001369mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001370 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001371 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001372}
1373
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001374/**
1375 * inputPush:
1376 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001377 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001378 *
1379 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001380 *
1381 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001382 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001383int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1385{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001386 if ((ctxt == NULL) || (value == NULL))
1387 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001388 if (ctxt->inputNr >= ctxt->inputMax) {
1389 ctxt->inputMax *= 2;
1390 ctxt->inputTab =
1391 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1392 ctxt->inputMax *
1393 sizeof(ctxt->inputTab[0]));
1394 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001395 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 return (0);
1397 }
1398 }
1399 ctxt->inputTab[ctxt->inputNr] = value;
1400 ctxt->input = value;
1401 return (ctxt->inputNr++);
1402}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001403/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001404 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001405 * @ctxt: an XML parser context
1406 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001408 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001409 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001410 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001411xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001412inputPop(xmlParserCtxtPtr ctxt)
1413{
1414 xmlParserInputPtr ret;
1415
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001416 if (ctxt == NULL)
1417 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001418 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001419 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001420 ctxt->inputNr--;
1421 if (ctxt->inputNr > 0)
1422 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1423 else
1424 ctxt->input = NULL;
1425 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001426 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 return (ret);
1428}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001429/**
1430 * nodePush:
1431 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001432 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001433 *
1434 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001435 *
1436 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001437 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001438int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001439nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1440{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001441 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001442 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001443 xmlNodePtr *tmp;
1444
1445 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1446 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001447 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001448 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001449 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001450 return (0);
1451 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001452 ctxt->nodeTab = tmp;
1453 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001454 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001455 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001456 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001457 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1458 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001459 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001460 return(0);
1461 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001462 ctxt->nodeTab[ctxt->nodeNr] = value;
1463 ctxt->node = value;
1464 return (ctxt->nodeNr++);
1465}
1466/**
1467 * nodePop:
1468 * @ctxt: an XML parser context
1469 *
1470 * Pops the top element node from the node stack
1471 *
1472 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001473 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001474xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475nodePop(xmlParserCtxtPtr ctxt)
1476{
1477 xmlNodePtr ret;
1478
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001479 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001480 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001481 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001482 ctxt->nodeNr--;
1483 if (ctxt->nodeNr > 0)
1484 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1485 else
1486 ctxt->node = NULL;
1487 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001488 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001489 return (ret);
1490}
Daniel Veillarda2351322004-06-27 12:08:10 +00001491
1492#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001494 * nameNsPush:
1495 * @ctxt: an XML parser context
1496 * @value: the element name
1497 * @prefix: the element prefix
1498 * @URI: the element namespace name
1499 *
1500 * Pushes a new element name/prefix/URL on top of the name stack
1501 *
1502 * Returns -1 in case of error, the index in the stack otherwise
1503 */
1504static int
1505nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1506 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1507{
1508 if (ctxt->nameNr >= ctxt->nameMax) {
1509 const xmlChar * *tmp;
1510 void **tmp2;
1511 ctxt->nameMax *= 2;
1512 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1513 ctxt->nameMax *
1514 sizeof(ctxt->nameTab[0]));
1515 if (tmp == NULL) {
1516 ctxt->nameMax /= 2;
1517 goto mem_error;
1518 }
1519 ctxt->nameTab = tmp;
1520 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1521 ctxt->nameMax * 3 *
1522 sizeof(ctxt->pushTab[0]));
1523 if (tmp2 == NULL) {
1524 ctxt->nameMax /= 2;
1525 goto mem_error;
1526 }
1527 ctxt->pushTab = tmp2;
1528 }
1529 ctxt->nameTab[ctxt->nameNr] = value;
1530 ctxt->name = value;
1531 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1532 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001533 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001534 return (ctxt->nameNr++);
1535mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001536 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 return (-1);
1538}
1539/**
1540 * nameNsPop:
1541 * @ctxt: an XML parser context
1542 *
1543 * Pops the top element/prefix/URI name from the name stack
1544 *
1545 * Returns the name just removed
1546 */
1547static const xmlChar *
1548nameNsPop(xmlParserCtxtPtr ctxt)
1549{
1550 const xmlChar *ret;
1551
1552 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001553 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001554 ctxt->nameNr--;
1555 if (ctxt->nameNr > 0)
1556 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1557 else
1558 ctxt->name = NULL;
1559 ret = ctxt->nameTab[ctxt->nameNr];
1560 ctxt->nameTab[ctxt->nameNr] = NULL;
1561 return (ret);
1562}
Daniel Veillarda2351322004-06-27 12:08:10 +00001563#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001564
1565/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001566 * namePush:
1567 * @ctxt: an XML parser context
1568 * @value: the element name
1569 *
1570 * Pushes a new element name on top of the name stack
1571 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001572 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001574int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001575namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return (-1);
1578
Daniel Veillard1c732d22002-11-30 11:22:59 +00001579 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001580 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001581 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001582 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001583 ctxt->nameMax *
1584 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001585 if (tmp == NULL) {
1586 ctxt->nameMax /= 2;
1587 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001588 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001589 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001590 }
1591 ctxt->nameTab[ctxt->nameNr] = value;
1592 ctxt->name = value;
1593 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001594mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001595 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001596 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001597}
1598/**
1599 * namePop:
1600 * @ctxt: an XML parser context
1601 *
1602 * Pops the top element name from the name stack
1603 *
1604 * Returns the name just removed
1605 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001606const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001607namePop(xmlParserCtxtPtr ctxt)
1608{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001609 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001610
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001611 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1612 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001613 ctxt->nameNr--;
1614 if (ctxt->nameNr > 0)
1615 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1616 else
1617 ctxt->name = NULL;
1618 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001619 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001620 return (ret);
1621}
Owen Taylor3473f882001-02-23 17:55:21 +00001622
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001623static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001624 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001625 int *tmp;
1626
Owen Taylor3473f882001-02-23 17:55:21 +00001627 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001628 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1629 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1630 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001632 return(0);
1633 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001634 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001635 }
1636 ctxt->spaceTab[ctxt->spaceNr] = val;
1637 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1638 return(ctxt->spaceNr++);
1639}
1640
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001641static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001642 int ret;
1643 if (ctxt->spaceNr <= 0) return(0);
1644 ctxt->spaceNr--;
1645 if (ctxt->spaceNr > 0)
1646 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1647 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001648 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001649 ret = ctxt->spaceTab[ctxt->spaceNr];
1650 ctxt->spaceTab[ctxt->spaceNr] = -1;
1651 return(ret);
1652}
1653
1654/*
1655 * Macros for accessing the content. Those should be used only by the parser,
1656 * and not exported.
1657 *
1658 * Dirty macros, i.e. one often need to make assumption on the context to
1659 * use them
1660 *
1661 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1662 * To be used with extreme caution since operations consuming
1663 * characters may move the input buffer to a different location !
1664 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1665 * This should be used internally by the parser
1666 * only to compare to ASCII values otherwise it would break when
1667 * running with UTF-8 encoding.
1668 * RAW same as CUR but in the input buffer, bypass any token
1669 * extraction that may have been done
1670 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1671 * to compare on ASCII based substring.
1672 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001673 * strings without newlines within the parser.
1674 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1675 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001676 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1677 *
1678 * NEXT Skip to the next character, this does the proper decoding
1679 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001680 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001681 * CUR_CHAR(l) returns the current unicode character (int), set l
1682 * to the number of xmlChars used for the encoding [0-5].
1683 * CUR_SCHAR same but operate on a string instead of the context
1684 * COPY_BUF copy the current unicode char to the target buffer, increment
1685 * the index
1686 * GROW, SHRINK handling of input buffers
1687 */
1688
Daniel Veillardfdc91562002-07-01 21:52:03 +00001689#define RAW (*ctxt->input->cur)
1690#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001691#define NXT(val) ctxt->input->cur[(val)]
1692#define CUR_PTR ctxt->input->cur
1693
Daniel Veillarda07050d2003-10-19 14:46:32 +00001694#define CMP4( s, c1, c2, c3, c4 ) \
1695 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1696 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1697#define CMP5( s, c1, c2, c3, c4, c5 ) \
1698 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1699#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1700 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1701#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1702 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1703#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1704 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1705#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1706 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1707 ((unsigned char *) s)[ 8 ] == c9 )
1708#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1709 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1710 ((unsigned char *) s)[ 9 ] == c10 )
1711
Owen Taylor3473f882001-02-23 17:55:21 +00001712#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001713 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001714 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001715 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001716 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1717 xmlPopInput(ctxt); \
1718 } while (0)
1719
Daniel Veillard0b787f32004-03-26 17:29:53 +00001720#define SKIPL(val) do { \
1721 int skipl; \
1722 for(skipl=0; skipl<val; skipl++) { \
1723 if (*(ctxt->input->cur) == '\n') { \
1724 ctxt->input->line++; ctxt->input->col = 1; \
1725 } else ctxt->input->col++; \
1726 ctxt->nbChars++; \
1727 ctxt->input->cur++; \
1728 } \
1729 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1730 if ((*ctxt->input->cur == 0) && \
1731 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1732 xmlPopInput(ctxt); \
1733 } while (0)
1734
Daniel Veillarda880b122003-04-21 21:36:41 +00001735#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001736 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1737 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001738 xmlSHRINK (ctxt);
1739
1740static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1741 xmlParserInputShrink(ctxt->input);
1742 if ((*ctxt->input->cur == 0) &&
1743 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1744 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745 }
Owen Taylor3473f882001-02-23 17:55:21 +00001746
Daniel Veillarda880b122003-04-21 21:36:41 +00001747#define GROW if ((ctxt->progressive == 0) && \
1748 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001749 xmlGROW (ctxt);
1750
1751static void xmlGROW (xmlParserCtxtPtr ctxt) {
1752 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1753 if ((*ctxt->input->cur == 0) &&
1754 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1755 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001756}
Owen Taylor3473f882001-02-23 17:55:21 +00001757
1758#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1759
1760#define NEXT xmlNextChar(ctxt)
1761
Daniel Veillard21a0f912001-02-25 19:54:14 +00001762#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001763 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001764 ctxt->input->cur++; \
1765 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001766 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001767 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1768 }
1769
Owen Taylor3473f882001-02-23 17:55:21 +00001770#define NEXTL(l) do { \
1771 if (*(ctxt->input->cur) == '\n') { \
1772 ctxt->input->line++; ctxt->input->col = 1; \
1773 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001774 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001775 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001776 } while (0)
1777
1778#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1779#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1780
1781#define COPY_BUF(l,b,i,v) \
1782 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001783 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001784
1785/**
1786 * xmlSkipBlankChars:
1787 * @ctxt: the XML parser context
1788 *
1789 * skip all blanks character found at that point in the input streams.
1790 * It pops up finished entities in the process if allowable at that point.
1791 *
1792 * Returns the number of space chars skipped
1793 */
1794
1795int
1796xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001797 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001798
1799 /*
1800 * It's Okay to use CUR/NEXT here since all the blanks are on
1801 * the ASCII range.
1802 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001803 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1804 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001805 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001806 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001807 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001808 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001809 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001810 if (*cur == '\n') {
1811 ctxt->input->line++; ctxt->input->col = 1;
1812 }
1813 cur++;
1814 res++;
1815 if (*cur == 0) {
1816 ctxt->input->cur = cur;
1817 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1818 cur = ctxt->input->cur;
1819 }
1820 }
1821 ctxt->input->cur = cur;
1822 } else {
1823 int cur;
1824 do {
1825 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001826 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001827 NEXT;
1828 cur = CUR;
1829 res++;
1830 }
1831 while ((cur == 0) && (ctxt->inputNr > 1) &&
1832 (ctxt->instate != XML_PARSER_COMMENT)) {
1833 xmlPopInput(ctxt);
1834 cur = CUR;
1835 }
1836 /*
1837 * Need to handle support of entities branching here
1838 */
1839 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1840 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1841 }
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(res);
1843}
1844
1845/************************************************************************
1846 * *
1847 * Commodity functions to handle entities *
1848 * *
1849 ************************************************************************/
1850
1851/**
1852 * xmlPopInput:
1853 * @ctxt: an XML parser context
1854 *
1855 * xmlPopInput: the current input pointed by ctxt->input came to an end
1856 * pop it and return the next char.
1857 *
1858 * Returns the current xmlChar in the parser context
1859 */
1860xmlChar
1861xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001862 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001863 if (xmlParserDebugEntities)
1864 xmlGenericError(xmlGenericErrorContext,
1865 "Popping input %d\n", ctxt->inputNr);
1866 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001867 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001868 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1869 return(xmlPopInput(ctxt));
1870 return(CUR);
1871}
1872
1873/**
1874 * xmlPushInput:
1875 * @ctxt: an XML parser context
1876 * @input: an XML parser input fragment (entity, XML fragment ...).
1877 *
1878 * xmlPushInput: switch to a new input stream which is stacked on top
1879 * of the previous one(s).
1880 */
1881void
1882xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1883 if (input == NULL) return;
1884
1885 if (xmlParserDebugEntities) {
1886 if ((ctxt->input != NULL) && (ctxt->input->filename))
1887 xmlGenericError(xmlGenericErrorContext,
1888 "%s(%d): ", ctxt->input->filename,
1889 ctxt->input->line);
1890 xmlGenericError(xmlGenericErrorContext,
1891 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1892 }
1893 inputPush(ctxt, input);
1894 GROW;
1895}
1896
1897/**
1898 * xmlParseCharRef:
1899 * @ctxt: an XML parser context
1900 *
1901 * parse Reference declarations
1902 *
1903 * [66] CharRef ::= '&#' [0-9]+ ';' |
1904 * '&#x' [0-9a-fA-F]+ ';'
1905 *
1906 * [ WFC: Legal Character ]
1907 * Characters referred to using character references must match the
1908 * production for Char.
1909 *
1910 * Returns the value parsed (as an int), 0 in case of error
1911 */
1912int
1913xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001914 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001915 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001916 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001917
Owen Taylor3473f882001-02-23 17:55:21 +00001918 /*
1919 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1920 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001921 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001922 (NXT(2) == 'x')) {
1923 SKIP(3);
1924 GROW;
1925 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001926 if (count++ > 20) {
1927 count = 0;
1928 GROW;
1929 }
1930 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001931 val = val * 16 + (CUR - '0');
1932 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1933 val = val * 16 + (CUR - 'a') + 10;
1934 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1935 val = val * 16 + (CUR - 'A') + 10;
1936 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001937 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001938 val = 0;
1939 break;
1940 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001941 if (val > 0x10FFFF)
1942 outofrange = val;
1943
Owen Taylor3473f882001-02-23 17:55:21 +00001944 NEXT;
1945 count++;
1946 }
1947 if (RAW == ';') {
1948 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001949 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001950 ctxt->nbChars ++;
1951 ctxt->input->cur++;
1952 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001953 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001954 SKIP(2);
1955 GROW;
1956 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001957 if (count++ > 20) {
1958 count = 0;
1959 GROW;
1960 }
1961 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001962 val = val * 10 + (CUR - '0');
1963 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001964 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001965 val = 0;
1966 break;
1967 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001968 if (val > 0x10FFFF)
1969 outofrange = val;
1970
Owen Taylor3473f882001-02-23 17:55:21 +00001971 NEXT;
1972 count++;
1973 }
1974 if (RAW == ';') {
1975 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001976 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001977 ctxt->nbChars ++;
1978 ctxt->input->cur++;
1979 }
1980 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001981 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001982 }
1983
1984 /*
1985 * [ WFC: Legal Character ]
1986 * Characters referred to using character references must match the
1987 * production for Char.
1988 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001989 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001990 return(val);
1991 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001992 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1993 "xmlParseCharRef: invalid xmlChar value %d\n",
1994 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 }
1996 return(0);
1997}
1998
1999/**
2000 * xmlParseStringCharRef:
2001 * @ctxt: an XML parser context
2002 * @str: a pointer to an index in the string
2003 *
2004 * parse Reference declarations, variant parsing from a string rather
2005 * than an an input flow.
2006 *
2007 * [66] CharRef ::= '&#' [0-9]+ ';' |
2008 * '&#x' [0-9a-fA-F]+ ';'
2009 *
2010 * [ WFC: Legal Character ]
2011 * Characters referred to using character references must match the
2012 * production for Char.
2013 *
2014 * Returns the value parsed (as an int), 0 in case of error, str will be
2015 * updated to the current value of the index
2016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002017static int
Owen Taylor3473f882001-02-23 17:55:21 +00002018xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2019 const xmlChar *ptr;
2020 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002021 unsigned int val = 0;
2022 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002023
2024 if ((str == NULL) || (*str == NULL)) return(0);
2025 ptr = *str;
2026 cur = *ptr;
2027 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2028 ptr += 3;
2029 cur = *ptr;
2030 while (cur != ';') { /* Non input consuming loop */
2031 if ((cur >= '0') && (cur <= '9'))
2032 val = val * 16 + (cur - '0');
2033 else if ((cur >= 'a') && (cur <= 'f'))
2034 val = val * 16 + (cur - 'a') + 10;
2035 else if ((cur >= 'A') && (cur <= 'F'))
2036 val = val * 16 + (cur - 'A') + 10;
2037 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002038 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002039 val = 0;
2040 break;
2041 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002042 if (val > 0x10FFFF)
2043 outofrange = val;
2044
Owen Taylor3473f882001-02-23 17:55:21 +00002045 ptr++;
2046 cur = *ptr;
2047 }
2048 if (cur == ';')
2049 ptr++;
2050 } else if ((cur == '&') && (ptr[1] == '#')){
2051 ptr += 2;
2052 cur = *ptr;
2053 while (cur != ';') { /* Non input consuming loops */
2054 if ((cur >= '0') && (cur <= '9'))
2055 val = val * 10 + (cur - '0');
2056 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002057 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002058 val = 0;
2059 break;
2060 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002061 if (val > 0x10FFFF)
2062 outofrange = val;
2063
Owen Taylor3473f882001-02-23 17:55:21 +00002064 ptr++;
2065 cur = *ptr;
2066 }
2067 if (cur == ';')
2068 ptr++;
2069 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002070 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002071 return(0);
2072 }
2073 *str = ptr;
2074
2075 /*
2076 * [ WFC: Legal Character ]
2077 * Characters referred to using character references must match the
2078 * production for Char.
2079 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002080 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002081 return(val);
2082 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002083 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2084 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2085 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002086 }
2087 return(0);
2088}
2089
2090/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002091 * xmlNewBlanksWrapperInputStream:
2092 * @ctxt: an XML parser context
2093 * @entity: an Entity pointer
2094 *
2095 * Create a new input stream for wrapping
2096 * blanks around a PEReference
2097 *
2098 * Returns the new input stream or NULL
2099 */
2100
2101static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2102
Daniel Veillardf4862f02002-09-10 11:13:43 +00002103static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002104xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2105 xmlParserInputPtr input;
2106 xmlChar *buffer;
2107 size_t length;
2108 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002109 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2110 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002111 return(NULL);
2112 }
2113 if (xmlParserDebugEntities)
2114 xmlGenericError(xmlGenericErrorContext,
2115 "new blanks wrapper for entity: %s\n", entity->name);
2116 input = xmlNewInputStream(ctxt);
2117 if (input == NULL) {
2118 return(NULL);
2119 }
2120 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002121 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002122 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002123 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002124 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002125 return(NULL);
2126 }
2127 buffer [0] = ' ';
2128 buffer [1] = '%';
2129 buffer [length-3] = ';';
2130 buffer [length-2] = ' ';
2131 buffer [length-1] = 0;
2132 memcpy(buffer + 2, entity->name, length - 5);
2133 input->free = deallocblankswrapper;
2134 input->base = buffer;
2135 input->cur = buffer;
2136 input->length = length;
2137 input->end = &buffer[length];
2138 return(input);
2139}
2140
2141/**
Owen Taylor3473f882001-02-23 17:55:21 +00002142 * xmlParserHandlePEReference:
2143 * @ctxt: the parser context
2144 *
2145 * [69] PEReference ::= '%' Name ';'
2146 *
2147 * [ WFC: No Recursion ]
2148 * A parsed entity must not contain a recursive
2149 * reference to itself, either directly or indirectly.
2150 *
2151 * [ WFC: Entity Declared ]
2152 * In a document without any DTD, a document with only an internal DTD
2153 * subset which contains no parameter entity references, or a document
2154 * with "standalone='yes'", ... ... The declaration of a parameter
2155 * entity must precede any reference to it...
2156 *
2157 * [ VC: Entity Declared ]
2158 * In a document with an external subset or external parameter entities
2159 * with "standalone='no'", ... ... The declaration of a parameter entity
2160 * must precede any reference to it...
2161 *
2162 * [ WFC: In DTD ]
2163 * Parameter-entity references may only appear in the DTD.
2164 * NOTE: misleading but this is handled.
2165 *
2166 * A PEReference may have been detected in the current input stream
2167 * the handling is done accordingly to
2168 * http://www.w3.org/TR/REC-xml#entproc
2169 * i.e.
2170 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002171 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002172 */
2173void
2174xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002175 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002176 xmlEntityPtr entity = NULL;
2177 xmlParserInputPtr input;
2178
Owen Taylor3473f882001-02-23 17:55:21 +00002179 if (RAW != '%') return;
2180 switch(ctxt->instate) {
2181 case XML_PARSER_CDATA_SECTION:
2182 return;
2183 case XML_PARSER_COMMENT:
2184 return;
2185 case XML_PARSER_START_TAG:
2186 return;
2187 case XML_PARSER_END_TAG:
2188 return;
2189 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002190 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002191 return;
2192 case XML_PARSER_PROLOG:
2193 case XML_PARSER_START:
2194 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002195 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002196 return;
2197 case XML_PARSER_ENTITY_DECL:
2198 case XML_PARSER_CONTENT:
2199 case XML_PARSER_ATTRIBUTE_VALUE:
2200 case XML_PARSER_PI:
2201 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002202 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002203 /* we just ignore it there */
2204 return;
2205 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002206 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002207 return;
2208 case XML_PARSER_ENTITY_VALUE:
2209 /*
2210 * NOTE: in the case of entity values, we don't do the
2211 * substitution here since we need the literal
2212 * entity value to be able to save the internal
2213 * subset of the document.
2214 * This will be handled by xmlStringDecodeEntities
2215 */
2216 return;
2217 case XML_PARSER_DTD:
2218 /*
2219 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2220 * In the internal DTD subset, parameter-entity references
2221 * can occur only where markup declarations can occur, not
2222 * within markup declarations.
2223 * In that case this is handled in xmlParseMarkupDecl
2224 */
2225 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2226 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002227 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002228 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002229 break;
2230 case XML_PARSER_IGNORE:
2231 return;
2232 }
2233
2234 NEXT;
2235 name = xmlParseName(ctxt);
2236 if (xmlParserDebugEntities)
2237 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002238 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002239 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 } else {
2242 if (RAW == ';') {
2243 NEXT;
2244 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2245 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2246 if (entity == NULL) {
2247
2248 /*
2249 * [ WFC: Entity Declared ]
2250 * In a document without any DTD, a document with only an
2251 * internal DTD subset which contains no parameter entity
2252 * references, or a document with "standalone='yes'", ...
2253 * ... The declaration of a parameter entity must precede
2254 * any reference to it...
2255 */
2256 if ((ctxt->standalone == 1) ||
2257 ((ctxt->hasExternalSubset == 0) &&
2258 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002259 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002260 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002261 } else {
2262 /*
2263 * [ VC: Entity Declared ]
2264 * In a document with an external subset or external
2265 * parameter entities with "standalone='no'", ...
2266 * ... The declaration of a parameter entity must precede
2267 * any reference to it...
2268 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002269 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2270 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2271 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002272 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002273 } else
2274 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2275 "PEReference: %%%s; not found\n",
2276 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002277 ctxt->valid = 0;
2278 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002279 } else if (ctxt->input->free != deallocblankswrapper) {
2280 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2281 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002282 } else {
2283 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2284 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002285 xmlChar start[4];
2286 xmlCharEncoding enc;
2287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 /*
2289 * handle the extra spaces added before and after
2290 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002291 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002292 */
2293 input = xmlNewEntityInputStream(ctxt, entity);
2294 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002295
2296 /*
2297 * Get the 4 first bytes and decode the charset
2298 * if enc != XML_CHAR_ENCODING_NONE
2299 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002300 * Note that, since we may have some non-UTF8
2301 * encoding (like UTF16, bug 135229), the 'length'
2302 * is not known, but we can calculate based upon
2303 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002304 */
2305 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002306 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002307 start[0] = RAW;
2308 start[1] = NXT(1);
2309 start[2] = NXT(2);
2310 start[3] = NXT(3);
2311 enc = xmlDetectCharEncoding(start, 4);
2312 if (enc != XML_CHAR_ENCODING_NONE) {
2313 xmlSwitchEncoding(ctxt, enc);
2314 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002315 }
2316
Owen Taylor3473f882001-02-23 17:55:21 +00002317 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002318 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2319 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 xmlParseTextDecl(ctxt);
2321 }
Owen Taylor3473f882001-02-23 17:55:21 +00002322 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002323 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2324 "PEReference: %s is not a parameter entity\n",
2325 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 }
2327 }
2328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002329 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002330 }
Owen Taylor3473f882001-02-23 17:55:21 +00002331 }
2332}
2333
2334/*
2335 * Macro used to grow the current buffer.
2336 */
2337#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002338 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002339 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002340 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002341 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002342 if (tmp == NULL) goto mem_error; \
2343 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002344}
2345
2346/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002347 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002348 * @ctxt: the parser context
2349 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002350 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002351 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2352 * @end: an end marker xmlChar, 0 if none
2353 * @end2: an end marker xmlChar, 0 if none
2354 * @end3: an end marker xmlChar, 0 if none
2355 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002356 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002357 *
2358 * [67] Reference ::= EntityRef | CharRef
2359 *
2360 * [69] PEReference ::= '%' Name ';'
2361 *
2362 * Returns A newly allocated string with the substitution done. The caller
2363 * must deallocate it !
2364 */
2365xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002366xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2367 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002368 xmlChar *buffer = NULL;
2369 int buffer_size = 0;
2370
2371 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002372 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002373 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002374 xmlEntityPtr ent;
2375 int c,l;
2376 int nbchars = 0;
2377
Daniel Veillarda82b1822004-11-08 16:24:57 +00002378 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002379 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002380 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002381
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002382 if ((ctxt->depth > 40) || (ctxt->nbentities >= 500000)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002383 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002384 return(NULL);
2385 }
2386
2387 /*
2388 * allocate a translation buffer.
2389 */
2390 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002391 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002392 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002393
2394 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002395 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002396 * we are operating on already parsed values.
2397 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002398 if (str < last)
2399 c = CUR_SCHAR(str, l);
2400 else
2401 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002402 while ((c != 0) && (c != end) && /* non input consuming loop */
2403 (c != end2) && (c != end3)) {
2404
2405 if (c == 0) break;
2406 if ((c == '&') && (str[1] == '#')) {
2407 int val = xmlParseStringCharRef(ctxt, &str);
2408 if (val != 0) {
2409 COPY_BUF(0,buffer,nbchars,val);
2410 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002411 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2412 growBuffer(buffer);
2413 }
Owen Taylor3473f882001-02-23 17:55:21 +00002414 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2415 if (xmlParserDebugEntities)
2416 xmlGenericError(xmlGenericErrorContext,
2417 "String decoding Entity Reference: %.30s\n",
2418 str);
2419 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002420 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2421 goto int_error;
2422 ctxt->nbentities++;
2423 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002424 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002425 if ((ent != NULL) &&
2426 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2427 if (ent->content != NULL) {
2428 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002429 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2430 growBuffer(buffer);
2431 }
Owen Taylor3473f882001-02-23 17:55:21 +00002432 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002433 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2434 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002437 ctxt->depth++;
2438 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2439 0, 0, 0);
2440 ctxt->depth--;
2441 if (rep != NULL) {
2442 current = rep;
2443 while (*current != 0) { /* non input consuming loop */
2444 buffer[nbchars++] = *current++;
2445 if (nbchars >
2446 buffer_size - XML_PARSER_BUFFER_SIZE) {
2447 growBuffer(buffer);
2448 }
2449 }
2450 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002451 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002452 }
2453 } else if (ent != NULL) {
2454 int i = xmlStrlen(ent->name);
2455 const xmlChar *cur = ent->name;
2456
2457 buffer[nbchars++] = '&';
2458 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2459 growBuffer(buffer);
2460 }
2461 for (;i > 0;i--)
2462 buffer[nbchars++] = *cur++;
2463 buffer[nbchars++] = ';';
2464 }
2465 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2466 if (xmlParserDebugEntities)
2467 xmlGenericError(xmlGenericErrorContext,
2468 "String decoding PE Reference: %.30s\n", str);
2469 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002470 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2471 goto int_error;
2472 ctxt->nbentities++;
2473 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002474 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002475 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002476 if (ent->content == NULL) {
2477 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2478 }
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 ctxt->depth++;
2481 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2482 0, 0, 0);
2483 ctxt->depth--;
2484 if (rep != NULL) {
2485 current = rep;
2486 while (*current != 0) { /* non input consuming loop */
2487 buffer[nbchars++] = *current++;
2488 if (nbchars >
2489 buffer_size - XML_PARSER_BUFFER_SIZE) {
2490 growBuffer(buffer);
2491 }
2492 }
2493 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002494 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496 }
2497 } else {
2498 COPY_BUF(l,buffer,nbchars,c);
2499 str += l;
2500 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2501 growBuffer(buffer);
2502 }
2503 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002504 if (str < last)
2505 c = CUR_SCHAR(str, l);
2506 else
2507 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002508 }
2509 buffer[nbchars++] = 0;
2510 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002511
2512mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002514int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002515 if (rep != NULL)
2516 xmlFree(rep);
2517 if (buffer != NULL)
2518 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002519 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002520}
2521
Daniel Veillarde57ec792003-09-10 10:50:59 +00002522/**
2523 * xmlStringDecodeEntities:
2524 * @ctxt: the parser context
2525 * @str: the input string
2526 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2527 * @end: an end marker xmlChar, 0 if none
2528 * @end2: an end marker xmlChar, 0 if none
2529 * @end3: an end marker xmlChar, 0 if none
2530 *
2531 * Takes a entity string content and process to do the adequate substitutions.
2532 *
2533 * [67] Reference ::= EntityRef | CharRef
2534 *
2535 * [69] PEReference ::= '%' Name ';'
2536 *
2537 * Returns A newly allocated string with the substitution done. The caller
2538 * must deallocate it !
2539 */
2540xmlChar *
2541xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2542 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002543 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002544 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2545 end, end2, end3));
2546}
Owen Taylor3473f882001-02-23 17:55:21 +00002547
2548/************************************************************************
2549 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002550 * Commodity functions, cleanup needed ? *
2551 * *
2552 ************************************************************************/
2553
2554/**
2555 * areBlanks:
2556 * @ctxt: an XML parser context
2557 * @str: a xmlChar *
2558 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002559 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002560 *
2561 * Is this a sequence of blank chars that one can ignore ?
2562 *
2563 * Returns 1 if ignorable 0 otherwise.
2564 */
2565
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002566static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2567 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002568 int i, ret;
2569 xmlNodePtr lastChild;
2570
Daniel Veillard05c13a22001-09-09 08:38:09 +00002571 /*
2572 * Don't spend time trying to differentiate them, the same callback is
2573 * used !
2574 */
2575 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002576 return(0);
2577
Owen Taylor3473f882001-02-23 17:55:21 +00002578 /*
2579 * Check for xml:space value.
2580 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002581 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2582 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002583 return(0);
2584
2585 /*
2586 * Check that the string is made of blanks
2587 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002588 if (blank_chars == 0) {
2589 for (i = 0;i < len;i++)
2590 if (!(IS_BLANK_CH(str[i]))) return(0);
2591 }
Owen Taylor3473f882001-02-23 17:55:21 +00002592
2593 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002594 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002595 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002596 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002597 if (ctxt->myDoc != NULL) {
2598 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2599 if (ret == 0) return(1);
2600 if (ret == 1) return(0);
2601 }
2602
2603 /*
2604 * Otherwise, heuristic :-\
2605 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002606 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002607 if ((ctxt->node->children == NULL) &&
2608 (RAW == '<') && (NXT(1) == '/')) return(0);
2609
2610 lastChild = xmlGetLastChild(ctxt->node);
2611 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002612 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2613 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002614 } else if (xmlNodeIsText(lastChild))
2615 return(0);
2616 else if ((ctxt->node->children != NULL) &&
2617 (xmlNodeIsText(ctxt->node->children)))
2618 return(0);
2619 return(1);
2620}
2621
Owen Taylor3473f882001-02-23 17:55:21 +00002622/************************************************************************
2623 * *
2624 * Extra stuff for namespace support *
2625 * Relates to http://www.w3.org/TR/WD-xml-names *
2626 * *
2627 ************************************************************************/
2628
2629/**
2630 * xmlSplitQName:
2631 * @ctxt: an XML parser context
2632 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002633 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002634 *
2635 * parse an UTF8 encoded XML qualified name string
2636 *
2637 * [NS 5] QName ::= (Prefix ':')? LocalPart
2638 *
2639 * [NS 6] Prefix ::= NCName
2640 *
2641 * [NS 7] LocalPart ::= NCName
2642 *
2643 * Returns the local part, and prefix is updated
2644 * to get the Prefix if any.
2645 */
2646
2647xmlChar *
2648xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2649 xmlChar buf[XML_MAX_NAMELEN + 5];
2650 xmlChar *buffer = NULL;
2651 int len = 0;
2652 int max = XML_MAX_NAMELEN;
2653 xmlChar *ret = NULL;
2654 const xmlChar *cur = name;
2655 int c;
2656
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002657 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002658 *prefix = NULL;
2659
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002660 if (cur == NULL) return(NULL);
2661
Owen Taylor3473f882001-02-23 17:55:21 +00002662#ifndef XML_XML_NAMESPACE
2663 /* xml: prefix is not really a namespace */
2664 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2665 (cur[2] == 'l') && (cur[3] == ':'))
2666 return(xmlStrdup(name));
2667#endif
2668
Daniel Veillard597bc482003-07-24 16:08:28 +00002669 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002670 if (cur[0] == ':')
2671 return(xmlStrdup(name));
2672
2673 c = *cur++;
2674 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2675 buf[len++] = c;
2676 c = *cur++;
2677 }
2678 if (len >= max) {
2679 /*
2680 * Okay someone managed to make a huge name, so he's ready to pay
2681 * for the processing speed.
2682 */
2683 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002684
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002685 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002686 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002687 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002688 return(NULL);
2689 }
2690 memcpy(buffer, buf, len);
2691 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2692 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002693 xmlChar *tmp;
2694
Owen Taylor3473f882001-02-23 17:55:21 +00002695 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002696 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002697 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002699 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002700 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 return(NULL);
2702 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002703 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002704 }
2705 buffer[len++] = c;
2706 c = *cur++;
2707 }
2708 buffer[len] = 0;
2709 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002710
Daniel Veillard597bc482003-07-24 16:08:28 +00002711 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002712 if (buffer != NULL)
2713 xmlFree(buffer);
2714 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002715 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002716 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002717
Owen Taylor3473f882001-02-23 17:55:21 +00002718 if (buffer == NULL)
2719 ret = xmlStrndup(buf, len);
2720 else {
2721 ret = buffer;
2722 buffer = NULL;
2723 max = XML_MAX_NAMELEN;
2724 }
2725
2726
2727 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002728 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002729 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002730 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002731 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002732 }
Owen Taylor3473f882001-02-23 17:55:21 +00002733 len = 0;
2734
Daniel Veillardbb284f42002-10-16 18:02:47 +00002735 /*
2736 * Check that the first character is proper to start
2737 * a new name
2738 */
2739 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2740 ((c >= 0x41) && (c <= 0x5A)) ||
2741 (c == '_') || (c == ':'))) {
2742 int l;
2743 int first = CUR_SCHAR(cur, l);
2744
2745 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002746 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002747 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002748 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002749 }
2750 }
2751 cur++;
2752
Owen Taylor3473f882001-02-23 17:55:21 +00002753 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2754 buf[len++] = c;
2755 c = *cur++;
2756 }
2757 if (len >= max) {
2758 /*
2759 * Okay someone managed to make a huge name, so he's ready to pay
2760 * for the processing speed.
2761 */
2762 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002763
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002764 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002765 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002766 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002767 return(NULL);
2768 }
2769 memcpy(buffer, buf, len);
2770 while (c != 0) { /* tested bigname2.xml */
2771 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002772 xmlChar *tmp;
2773
Owen Taylor3473f882001-02-23 17:55:21 +00002774 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002775 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002776 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002777 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002778 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002779 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002780 return(NULL);
2781 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 }
2784 buffer[len++] = c;
2785 c = *cur++;
2786 }
2787 buffer[len] = 0;
2788 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002789
Owen Taylor3473f882001-02-23 17:55:21 +00002790 if (buffer == NULL)
2791 ret = xmlStrndup(buf, len);
2792 else {
2793 ret = buffer;
2794 }
2795 }
2796
2797 return(ret);
2798}
2799
2800/************************************************************************
2801 * *
2802 * The parser itself *
2803 * Relates to http://www.w3.org/TR/REC-xml *
2804 * *
2805 ************************************************************************/
2806
Daniel Veillard34e3f642008-07-29 09:02:27 +00002807/************************************************************************
2808 * *
2809 * Routines to parse Name, NCName and NmToken *
2810 * *
2811 ************************************************************************/
2812unsigned long nbParseName = 0;
2813unsigned long nbParseNmToken = 0;
2814unsigned long nbParseNCName = 0;
2815unsigned long nbParseNCNameComplex = 0;
2816unsigned long nbParseNameComplex = 0;
2817unsigned long nbParseStringName = 0;
2818/*
2819 * The two following functions are related to the change of accepted
2820 * characters for Name and NmToken in the Revision 5 of XML-1.0
2821 * They correspond to the modified production [4] and the new production [4a]
2822 * changes in that revision. Also note that the macros used for the
2823 * productions Letter, Digit, CombiningChar and Extender are not needed
2824 * anymore.
2825 * We still keep compatibility to pre-revision5 parsing semantic if the
2826 * new XML_PARSE_OLD10 option is given to the parser.
2827 */
2828static int
2829xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2830 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2831 /*
2832 * Use the new checks of production [4] [4a] amd [5] of the
2833 * Update 5 of XML-1.0
2834 */
2835 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2836 (((c >= 'a') && (c <= 'z')) ||
2837 ((c >= 'A') && (c <= 'Z')) ||
2838 (c == '_') || (c == ':') ||
2839 ((c >= 0xC0) && (c <= 0xD6)) ||
2840 ((c >= 0xD8) && (c <= 0xF6)) ||
2841 ((c >= 0xF8) && (c <= 0x2FF)) ||
2842 ((c >= 0x370) && (c <= 0x37D)) ||
2843 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2844 ((c >= 0x200C) && (c <= 0x200D)) ||
2845 ((c >= 0x2070) && (c <= 0x218F)) ||
2846 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2847 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2848 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2849 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2850 ((c >= 0x10000) && (c <= 0xEFFFF))))
2851 return(1);
2852 } else {
2853 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2854 return(1);
2855 }
2856 return(0);
2857}
2858
2859static int
2860xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2861 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2862 /*
2863 * Use the new checks of production [4] [4a] amd [5] of the
2864 * Update 5 of XML-1.0
2865 */
2866 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2867 (((c >= 'a') && (c <= 'z')) ||
2868 ((c >= 'A') && (c <= 'Z')) ||
2869 ((c >= '0') && (c <= '9')) || /* !start */
2870 (c == '_') || (c == ':') ||
2871 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2872 ((c >= 0xC0) && (c <= 0xD6)) ||
2873 ((c >= 0xD8) && (c <= 0xF6)) ||
2874 ((c >= 0xF8) && (c <= 0x2FF)) ||
2875 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2876 ((c >= 0x370) && (c <= 0x37D)) ||
2877 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2878 ((c >= 0x200C) && (c <= 0x200D)) ||
2879 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2880 ((c >= 0x2070) && (c <= 0x218F)) ||
2881 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2882 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2883 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2884 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2885 ((c >= 0x10000) && (c <= 0xEFFFF))))
2886 return(1);
2887 } else {
2888 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2889 (c == '.') || (c == '-') ||
2890 (c == '_') || (c == ':') ||
2891 (IS_COMBINING(c)) ||
2892 (IS_EXTENDER(c)))
2893 return(1);
2894 }
2895 return(0);
2896}
2897
Daniel Veillarde57ec792003-09-10 10:50:59 +00002898static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002899 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002900
Daniel Veillard34e3f642008-07-29 09:02:27 +00002901static const xmlChar *
2902xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2903 int len = 0, l;
2904 int c;
2905 int count = 0;
2906
2907 nbParseNameComplex++;
2908
2909 /*
2910 * Handler for more complex cases
2911 */
2912 GROW;
2913 c = CUR_CHAR(l);
2914 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2915 /*
2916 * Use the new checks of production [4] [4a] amd [5] of the
2917 * Update 5 of XML-1.0
2918 */
2919 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2920 (!(((c >= 'a') && (c <= 'z')) ||
2921 ((c >= 'A') && (c <= 'Z')) ||
2922 (c == '_') || (c == ':') ||
2923 ((c >= 0xC0) && (c <= 0xD6)) ||
2924 ((c >= 0xD8) && (c <= 0xF6)) ||
2925 ((c >= 0xF8) && (c <= 0x2FF)) ||
2926 ((c >= 0x370) && (c <= 0x37D)) ||
2927 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2928 ((c >= 0x200C) && (c <= 0x200D)) ||
2929 ((c >= 0x2070) && (c <= 0x218F)) ||
2930 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2931 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2932 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2933 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2934 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
2935 return(NULL);
2936 }
2937 len += l;
2938 NEXTL(l);
2939 c = CUR_CHAR(l);
2940 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2941 (((c >= 'a') && (c <= 'z')) ||
2942 ((c >= 'A') && (c <= 'Z')) ||
2943 ((c >= '0') && (c <= '9')) || /* !start */
2944 (c == '_') || (c == ':') ||
2945 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2946 ((c >= 0xC0) && (c <= 0xD6)) ||
2947 ((c >= 0xD8) && (c <= 0xF6)) ||
2948 ((c >= 0xF8) && (c <= 0x2FF)) ||
2949 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2950 ((c >= 0x370) && (c <= 0x37D)) ||
2951 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2952 ((c >= 0x200C) && (c <= 0x200D)) ||
2953 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2954 ((c >= 0x2070) && (c <= 0x218F)) ||
2955 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2956 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2957 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2958 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2959 ((c >= 0x10000) && (c <= 0xEFFFF))
2960 )) {
2961 if (count++ > 100) {
2962 count = 0;
2963 GROW;
2964 }
2965 len += l;
2966 NEXTL(l);
2967 c = CUR_CHAR(l);
2968 }
2969 } else {
2970 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2971 (!IS_LETTER(c) && (c != '_') &&
2972 (c != ':'))) {
2973 return(NULL);
2974 }
2975 len += l;
2976 NEXTL(l);
2977 c = CUR_CHAR(l);
2978
2979 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2980 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2981 (c == '.') || (c == '-') ||
2982 (c == '_') || (c == ':') ||
2983 (IS_COMBINING(c)) ||
2984 (IS_EXTENDER(c)))) {
2985 if (count++ > 100) {
2986 count = 0;
2987 GROW;
2988 }
2989 len += l;
2990 NEXTL(l);
2991 c = CUR_CHAR(l);
2992 }
2993 }
2994 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2995 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2996 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2997}
2998
Owen Taylor3473f882001-02-23 17:55:21 +00002999/**
3000 * xmlParseName:
3001 * @ctxt: an XML parser context
3002 *
3003 * parse an XML name.
3004 *
3005 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3006 * CombiningChar | Extender
3007 *
3008 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3009 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003010 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003011 *
3012 * Returns the Name parsed or NULL
3013 */
3014
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003015const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003016xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003017 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003018 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003019 int count = 0;
3020
3021 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003022
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023 nbParseName++;
3024
Daniel Veillard48b2f892001-02-25 16:11:03 +00003025 /*
3026 * Accelerator for simple ASCII names
3027 */
3028 in = ctxt->input->cur;
3029 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3030 ((*in >= 0x41) && (*in <= 0x5A)) ||
3031 (*in == '_') || (*in == ':')) {
3032 in++;
3033 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3034 ((*in >= 0x41) && (*in <= 0x5A)) ||
3035 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003036 (*in == '_') || (*in == '-') ||
3037 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003038 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003039 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003040 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003041 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003042 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003043 ctxt->nbChars += count;
3044 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003045 if (ret == NULL)
3046 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003047 return(ret);
3048 }
3049 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003050 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003051 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003052}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003053
Daniel Veillard34e3f642008-07-29 09:02:27 +00003054static const xmlChar *
3055xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3056 int len = 0, l;
3057 int c;
3058 int count = 0;
3059
3060 nbParseNCNameComplex++;
3061
3062 /*
3063 * Handler for more complex cases
3064 */
3065 GROW;
3066 c = CUR_CHAR(l);
3067 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3068 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3069 return(NULL);
3070 }
3071
3072 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3073 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3074 if (count++ > 100) {
3075 count = 0;
3076 GROW;
3077 }
3078 len += l;
3079 NEXTL(l);
3080 c = CUR_CHAR(l);
3081 }
3082 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3083}
3084
3085/**
3086 * xmlParseNCName:
3087 * @ctxt: an XML parser context
3088 * @len: lenght of the string parsed
3089 *
3090 * parse an XML name.
3091 *
3092 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3093 * CombiningChar | Extender
3094 *
3095 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3096 *
3097 * Returns the Name parsed or NULL
3098 */
3099
3100static const xmlChar *
3101xmlParseNCName(xmlParserCtxtPtr ctxt) {
3102 const xmlChar *in;
3103 const xmlChar *ret;
3104 int count = 0;
3105
3106 nbParseNCName++;
3107
3108 /*
3109 * Accelerator for simple ASCII names
3110 */
3111 in = ctxt->input->cur;
3112 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3113 ((*in >= 0x41) && (*in <= 0x5A)) ||
3114 (*in == '_')) {
3115 in++;
3116 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3117 ((*in >= 0x41) && (*in <= 0x5A)) ||
3118 ((*in >= 0x30) && (*in <= 0x39)) ||
3119 (*in == '_') || (*in == '-') ||
3120 (*in == '.'))
3121 in++;
3122 if ((*in > 0) && (*in < 0x80)) {
3123 count = in - ctxt->input->cur;
3124 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3125 ctxt->input->cur = in;
3126 ctxt->nbChars += count;
3127 ctxt->input->col += count;
3128 if (ret == NULL) {
3129 xmlErrMemory(ctxt, NULL);
3130 }
3131 return(ret);
3132 }
3133 }
3134 return(xmlParseNCNameComplex(ctxt));
3135}
3136
Daniel Veillard46de64e2002-05-29 08:21:33 +00003137/**
3138 * xmlParseNameAndCompare:
3139 * @ctxt: an XML parser context
3140 *
3141 * parse an XML name and compares for match
3142 * (specialized for endtag parsing)
3143 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003144 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3145 * and the name for mismatch
3146 */
3147
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003148static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003149xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003150 register const xmlChar *cmp = other;
3151 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003152 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003153
3154 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003155
Daniel Veillard46de64e2002-05-29 08:21:33 +00003156 in = ctxt->input->cur;
3157 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003159 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003160 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003161 }
William M. Brack76e95df2003-10-18 16:20:14 +00003162 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003163 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003164 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003165 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003166 }
3167 /* failure (or end of input buffer), check with full function */
3168 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003169 /* strings coming from the dictionnary direct compare possible */
3170 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003171 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003172 }
3173 return ret;
3174}
3175
Owen Taylor3473f882001-02-23 17:55:21 +00003176/**
3177 * xmlParseStringName:
3178 * @ctxt: an XML parser context
3179 * @str: a pointer to the string pointer (IN/OUT)
3180 *
3181 * parse an XML name.
3182 *
3183 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3184 * CombiningChar | Extender
3185 *
3186 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3187 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003188 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003189 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003190 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003191 * is updated to the current location in the string.
3192 */
3193
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003194static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003195xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3196 xmlChar buf[XML_MAX_NAMELEN + 5];
3197 const xmlChar *cur = *str;
3198 int len = 0, l;
3199 int c;
3200
Daniel Veillard34e3f642008-07-29 09:02:27 +00003201 nbParseStringName++;
3202
Owen Taylor3473f882001-02-23 17:55:21 +00003203 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003204 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003205 return(NULL);
3206 }
3207
Daniel Veillard34e3f642008-07-29 09:02:27 +00003208 COPY_BUF(l,buf,len,c);
3209 cur += l;
3210 c = CUR_SCHAR(cur, l);
3211 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003212 COPY_BUF(l,buf,len,c);
3213 cur += l;
3214 c = CUR_SCHAR(cur, l);
3215 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3216 /*
3217 * Okay someone managed to make a huge name, so he's ready to pay
3218 * for the processing speed.
3219 */
3220 xmlChar *buffer;
3221 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003222
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003223 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003225 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003226 return(NULL);
3227 }
3228 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003230 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003231 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003232 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003233 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003234 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003235 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003236 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003237 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003238 return(NULL);
3239 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003240 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003241 }
3242 COPY_BUF(l,buffer,len,c);
3243 cur += l;
3244 c = CUR_SCHAR(cur, l);
3245 }
3246 buffer[len] = 0;
3247 *str = cur;
3248 return(buffer);
3249 }
3250 }
3251 *str = cur;
3252 return(xmlStrndup(buf, len));
3253}
3254
3255/**
3256 * xmlParseNmtoken:
3257 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003258 *
Owen Taylor3473f882001-02-23 17:55:21 +00003259 * parse an XML Nmtoken.
3260 *
3261 * [7] Nmtoken ::= (NameChar)+
3262 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003263 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003264 *
3265 * Returns the Nmtoken parsed or NULL
3266 */
3267
3268xmlChar *
3269xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3270 xmlChar buf[XML_MAX_NAMELEN + 5];
3271 int len = 0, l;
3272 int c;
3273 int count = 0;
3274
Daniel Veillard34e3f642008-07-29 09:02:27 +00003275 nbParseNmToken++;
3276
Owen Taylor3473f882001-02-23 17:55:21 +00003277 GROW;
3278 c = CUR_CHAR(l);
3279
Daniel Veillard34e3f642008-07-29 09:02:27 +00003280 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003281 if (count++ > 100) {
3282 count = 0;
3283 GROW;
3284 }
3285 COPY_BUF(l,buf,len,c);
3286 NEXTL(l);
3287 c = CUR_CHAR(l);
3288 if (len >= XML_MAX_NAMELEN) {
3289 /*
3290 * Okay someone managed to make a huge token, so he's ready to pay
3291 * for the processing speed.
3292 */
3293 xmlChar *buffer;
3294 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003302 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003303 if (count++ > 100) {
3304 count = 0;
3305 GROW;
3306 }
3307 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 xmlChar *tmp;
3309
Owen Taylor3473f882001-02-23 17:55:21 +00003310 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003311 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003312 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003313 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003314 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003316 return(NULL);
3317 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003318 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003319 }
3320 COPY_BUF(l,buffer,len,c);
3321 NEXTL(l);
3322 c = CUR_CHAR(l);
3323 }
3324 buffer[len] = 0;
3325 return(buffer);
3326 }
3327 }
3328 if (len == 0)
3329 return(NULL);
3330 return(xmlStrndup(buf, len));
3331}
3332
3333/**
3334 * xmlParseEntityValue:
3335 * @ctxt: an XML parser context
3336 * @orig: if non-NULL store a copy of the original entity value
3337 *
3338 * parse a value for ENTITY declarations
3339 *
3340 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3341 * "'" ([^%&'] | PEReference | Reference)* "'"
3342 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003343 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003344 */
3345
3346xmlChar *
3347xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3348 xmlChar *buf = NULL;
3349 int len = 0;
3350 int size = XML_PARSER_BUFFER_SIZE;
3351 int c, l;
3352 xmlChar stop;
3353 xmlChar *ret = NULL;
3354 const xmlChar *cur = NULL;
3355 xmlParserInputPtr input;
3356
3357 if (RAW == '"') stop = '"';
3358 else if (RAW == '\'') stop = '\'';
3359 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003360 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003363 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003364 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003365 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003366 return(NULL);
3367 }
3368
3369 /*
3370 * The content of the entity definition is copied in a buffer.
3371 */
3372
3373 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3374 input = ctxt->input;
3375 GROW;
3376 NEXT;
3377 c = CUR_CHAR(l);
3378 /*
3379 * NOTE: 4.4.5 Included in Literal
3380 * When a parameter entity reference appears in a literal entity
3381 * value, ... a single or double quote character in the replacement
3382 * text is always treated as a normal data character and will not
3383 * terminate the literal.
3384 * In practice it means we stop the loop only when back at parsing
3385 * the initial entity and the quote is found
3386 */
William M. Brack871611b2003-10-18 04:53:14 +00003387 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003388 (ctxt->input != input))) {
3389 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003390 xmlChar *tmp;
3391
Owen Taylor3473f882001-02-23 17:55:21 +00003392 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003393 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3394 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003395 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003396 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003397 return(NULL);
3398 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003399 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003400 }
3401 COPY_BUF(l,buf,len,c);
3402 NEXTL(l);
3403 /*
3404 * Pop-up of finished entities.
3405 */
3406 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3407 xmlPopInput(ctxt);
3408
3409 GROW;
3410 c = CUR_CHAR(l);
3411 if (c == 0) {
3412 GROW;
3413 c = CUR_CHAR(l);
3414 }
3415 }
3416 buf[len] = 0;
3417
3418 /*
3419 * Raise problem w.r.t. '&' and '%' being used in non-entities
3420 * reference constructs. Note Charref will be handled in
3421 * xmlStringDecodeEntities()
3422 */
3423 cur = buf;
3424 while (*cur != 0) { /* non input consuming */
3425 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3426 xmlChar *name;
3427 xmlChar tmp = *cur;
3428
3429 cur++;
3430 name = xmlParseStringName(ctxt, &cur);
3431 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003432 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003433 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003434 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003435 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003436 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3437 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003438 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003439 }
3440 if (name != NULL)
3441 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003442 if (*cur == 0)
3443 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 cur++;
3446 }
3447
3448 /*
3449 * Then PEReference entities are substituted.
3450 */
3451 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003452 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003453 xmlFree(buf);
3454 } else {
3455 NEXT;
3456 /*
3457 * NOTE: 4.4.7 Bypassed
3458 * When a general entity reference appears in the EntityValue in
3459 * an entity declaration, it is bypassed and left as is.
3460 * so XML_SUBSTITUTE_REF is not set here.
3461 */
3462 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3463 0, 0, 0);
3464 if (orig != NULL)
3465 *orig = buf;
3466 else
3467 xmlFree(buf);
3468 }
3469
3470 return(ret);
3471}
3472
3473/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003474 * xmlParseAttValueComplex:
3475 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003476 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003477 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003478 *
3479 * parse a value for an attribute, this is the fallback function
3480 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003481 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003482 *
3483 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3484 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003485static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003486xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003487 xmlChar limit = 0;
3488 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003489 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003490 int len = 0;
3491 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003492 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003493 xmlChar *current = NULL;
3494 xmlEntityPtr ent;
3495
Owen Taylor3473f882001-02-23 17:55:21 +00003496 if (NXT(0) == '"') {
3497 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3498 limit = '"';
3499 NEXT;
3500 } else if (NXT(0) == '\'') {
3501 limit = '\'';
3502 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3503 NEXT;
3504 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003506 return(NULL);
3507 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003508
Owen Taylor3473f882001-02-23 17:55:21 +00003509 /*
3510 * allocate a translation buffer.
3511 */
3512 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003513 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003514 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003515
3516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003517 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003518 */
3519 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003520 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003521 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003522 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003523 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003524 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 if (NXT(1) == '#') {
3526 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003527
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003529 if (ctxt->replaceEntities) {
3530 if (len > buf_size - 10) {
3531 growBuffer(buf);
3532 }
3533 buf[len++] = '&';
3534 } else {
3535 /*
3536 * The reparsing will be done in xmlStringGetNodeList()
3537 * called by the attribute() function in SAX.c
3538 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003539 if (len > buf_size - 10) {
3540 growBuffer(buf);
3541 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003542 buf[len++] = '&';
3543 buf[len++] = '#';
3544 buf[len++] = '3';
3545 buf[len++] = '8';
3546 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003548 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003549 if (len > buf_size - 10) {
3550 growBuffer(buf);
3551 }
Owen Taylor3473f882001-02-23 17:55:21 +00003552 len += xmlCopyChar(0, &buf[len], val);
3553 }
3554 } else {
3555 ent = xmlParseEntityRef(ctxt);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00003556 ctxt->nbentities++;
3557 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00003558 ctxt->nbentities += ent->checked;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003559 if ((ent != NULL) &&
3560 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3561 if (len > buf_size - 10) {
3562 growBuffer(buf);
3563 }
3564 if ((ctxt->replaceEntities == 0) &&
3565 (ent->content[0] == '&')) {
3566 buf[len++] = '&';
3567 buf[len++] = '#';
3568 buf[len++] = '3';
3569 buf[len++] = '8';
3570 buf[len++] = ';';
3571 } else {
3572 buf[len++] = ent->content[0];
3573 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003574 } else if ((ent != NULL) &&
3575 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003576 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3577 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003578 XML_SUBSTITUTE_REF,
3579 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003580 if (rep != NULL) {
3581 current = rep;
3582 while (*current != 0) { /* non input consuming */
3583 buf[len++] = *current++;
3584 if (len > buf_size - 10) {
3585 growBuffer(buf);
3586 }
3587 }
3588 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003589 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003590 }
3591 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003592 if (len > buf_size - 10) {
3593 growBuffer(buf);
3594 }
Owen Taylor3473f882001-02-23 17:55:21 +00003595 if (ent->content != NULL)
3596 buf[len++] = ent->content[0];
3597 }
3598 } else if (ent != NULL) {
3599 int i = xmlStrlen(ent->name);
3600 const xmlChar *cur = ent->name;
3601
3602 /*
3603 * This may look absurd but is needed to detect
3604 * entities problems
3605 */
3606 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3607 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003608 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003609 XML_SUBSTITUTE_REF, 0, 0, 0);
3610 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003611 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003612 rep = NULL;
3613 }
Owen Taylor3473f882001-02-23 17:55:21 +00003614 }
3615
3616 /*
3617 * Just output the reference
3618 */
3619 buf[len++] = '&';
3620 if (len > buf_size - i - 10) {
3621 growBuffer(buf);
3622 }
3623 for (;i > 0;i--)
3624 buf[len++] = *cur++;
3625 buf[len++] = ';';
3626 }
3627 }
3628 } else {
3629 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003630 if ((len != 0) || (!normalize)) {
3631 if ((!normalize) || (!in_space)) {
3632 COPY_BUF(l,buf,len,0x20);
3633 if (len > buf_size - 10) {
3634 growBuffer(buf);
3635 }
3636 }
3637 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003638 }
3639 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003640 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003641 COPY_BUF(l,buf,len,c);
3642 if (len > buf_size - 10) {
3643 growBuffer(buf);
3644 }
3645 }
3646 NEXTL(l);
3647 }
3648 GROW;
3649 c = CUR_CHAR(l);
3650 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003651 if ((in_space) && (normalize)) {
3652 while (buf[len - 1] == 0x20) len--;
3653 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003654 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003655 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003656 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003657 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003658 if ((c != 0) && (!IS_CHAR(c))) {
3659 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3660 "invalid character in attribute value\n");
3661 } else {
3662 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3663 "AttValue: ' expected\n");
3664 }
Owen Taylor3473f882001-02-23 17:55:21 +00003665 } else
3666 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003667 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003668 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003669
3670mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003671 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003672 if (buf != NULL)
3673 xmlFree(buf);
3674 if (rep != NULL)
3675 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003676 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003677}
3678
3679/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003680 * xmlParseAttValue:
3681 * @ctxt: an XML parser context
3682 *
3683 * parse a value for an attribute
3684 * Note: the parser won't do substitution of entities here, this
3685 * will be handled later in xmlStringGetNodeList
3686 *
3687 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3688 * "'" ([^<&'] | Reference)* "'"
3689 *
3690 * 3.3.3 Attribute-Value Normalization:
3691 * Before the value of an attribute is passed to the application or
3692 * checked for validity, the XML processor must normalize it as follows:
3693 * - a character reference is processed by appending the referenced
3694 * character to the attribute value
3695 * - an entity reference is processed by recursively processing the
3696 * replacement text of the entity
3697 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3698 * appending #x20 to the normalized value, except that only a single
3699 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3700 * parsed entity or the literal entity value of an internal parsed entity
3701 * - other characters are processed by appending them to the normalized value
3702 * If the declared value is not CDATA, then the XML processor must further
3703 * process the normalized attribute value by discarding any leading and
3704 * trailing space (#x20) characters, and by replacing sequences of space
3705 * (#x20) characters by a single space (#x20) character.
3706 * All attributes for which no declaration has been read should be treated
3707 * by a non-validating parser as if declared CDATA.
3708 *
3709 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3710 */
3711
3712
3713xmlChar *
3714xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003715 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003716 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003717}
3718
3719/**
Owen Taylor3473f882001-02-23 17:55:21 +00003720 * xmlParseSystemLiteral:
3721 * @ctxt: an XML parser context
3722 *
3723 * parse an XML Literal
3724 *
3725 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3726 *
3727 * Returns the SystemLiteral parsed or NULL
3728 */
3729
3730xmlChar *
3731xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3732 xmlChar *buf = NULL;
3733 int len = 0;
3734 int size = XML_PARSER_BUFFER_SIZE;
3735 int cur, l;
3736 xmlChar stop;
3737 int state = ctxt->instate;
3738 int count = 0;
3739
3740 SHRINK;
3741 if (RAW == '"') {
3742 NEXT;
3743 stop = '"';
3744 } else if (RAW == '\'') {
3745 NEXT;
3746 stop = '\'';
3747 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003748 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003749 return(NULL);
3750 }
3751
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003753 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003754 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return(NULL);
3756 }
3757 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3758 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003759 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003760 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003761 xmlChar *tmp;
3762
Owen Taylor3473f882001-02-23 17:55:21 +00003763 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003764 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3765 if (tmp == NULL) {
3766 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003767 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003768 ctxt->instate = (xmlParserInputState) state;
3769 return(NULL);
3770 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003771 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003772 }
3773 count++;
3774 if (count > 50) {
3775 GROW;
3776 count = 0;
3777 }
3778 COPY_BUF(l,buf,len,cur);
3779 NEXTL(l);
3780 cur = CUR_CHAR(l);
3781 if (cur == 0) {
3782 GROW;
3783 SHRINK;
3784 cur = CUR_CHAR(l);
3785 }
3786 }
3787 buf[len] = 0;
3788 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003789 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003790 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003791 } else {
3792 NEXT;
3793 }
3794 return(buf);
3795}
3796
3797/**
3798 * xmlParsePubidLiteral:
3799 * @ctxt: an XML parser context
3800 *
3801 * parse an XML public literal
3802 *
3803 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3804 *
3805 * Returns the PubidLiteral parsed or NULL.
3806 */
3807
3808xmlChar *
3809xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3810 xmlChar *buf = NULL;
3811 int len = 0;
3812 int size = XML_PARSER_BUFFER_SIZE;
3813 xmlChar cur;
3814 xmlChar stop;
3815 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003816 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003817
3818 SHRINK;
3819 if (RAW == '"') {
3820 NEXT;
3821 stop = '"';
3822 } else if (RAW == '\'') {
3823 NEXT;
3824 stop = '\'';
3825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003826 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003827 return(NULL);
3828 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003829 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003830 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003831 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003832 return(NULL);
3833 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003834 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003836 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003837 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003838 xmlChar *tmp;
3839
Owen Taylor3473f882001-02-23 17:55:21 +00003840 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003841 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3842 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003843 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003844 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003845 return(NULL);
3846 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003847 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003848 }
3849 buf[len++] = cur;
3850 count++;
3851 if (count > 50) {
3852 GROW;
3853 count = 0;
3854 }
3855 NEXT;
3856 cur = CUR;
3857 if (cur == 0) {
3858 GROW;
3859 SHRINK;
3860 cur = CUR;
3861 }
3862 }
3863 buf[len] = 0;
3864 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003865 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003866 } else {
3867 NEXT;
3868 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003869 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003870 return(buf);
3871}
3872
Daniel Veillard48b2f892001-02-25 16:11:03 +00003873void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003874
3875/*
3876 * used for the test in the inner loop of the char data testing
3877 */
3878static const unsigned char test_char_data[256] = {
3879 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3880 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3881 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3882 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3883 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3884 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3885 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3886 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3887 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3888 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3889 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3890 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3891 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3892 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3893 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3894 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3895 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3896 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3897 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3898 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3899 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3900 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3901 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3902 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3903 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3904 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3905 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3906 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3907 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3908 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3909 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3910 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3911};
3912
Owen Taylor3473f882001-02-23 17:55:21 +00003913/**
3914 * xmlParseCharData:
3915 * @ctxt: an XML parser context
3916 * @cdata: int indicating whether we are within a CDATA section
3917 *
3918 * parse a CharData section.
3919 * if we are within a CDATA section ']]>' marks an end of section.
3920 *
3921 * The right angle bracket (>) may be represented using the string "&gt;",
3922 * and must, for compatibility, be escaped using "&gt;" or a character
3923 * reference when it appears in the string "]]>" in content, when that
3924 * string is not marking the end of a CDATA section.
3925 *
3926 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3927 */
3928
3929void
3930xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003931 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003932 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003933 int line = ctxt->input->line;
3934 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003935 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003936
3937 SHRINK;
3938 GROW;
3939 /*
3940 * Accelerated common case where input don't need to be
3941 * modified before passing it to the handler.
3942 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003943 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003944 in = ctxt->input->cur;
3945 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003946get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003947 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003948 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003949 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003950 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003951 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003952 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003953 goto get_more_space;
3954 }
3955 if (*in == '<') {
3956 nbchar = in - ctxt->input->cur;
3957 if (nbchar > 0) {
3958 const xmlChar *tmp = ctxt->input->cur;
3959 ctxt->input->cur = in;
3960
Daniel Veillard34099b42004-11-04 17:34:35 +00003961 if ((ctxt->sax != NULL) &&
3962 (ctxt->sax->ignorableWhitespace !=
3963 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003964 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003965 if (ctxt->sax->ignorableWhitespace != NULL)
3966 ctxt->sax->ignorableWhitespace(ctxt->userData,
3967 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003968 } else {
3969 if (ctxt->sax->characters != NULL)
3970 ctxt->sax->characters(ctxt->userData,
3971 tmp, nbchar);
3972 if (*ctxt->space == -1)
3973 *ctxt->space = -2;
3974 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003975 } else if ((ctxt->sax != NULL) &&
3976 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003977 ctxt->sax->characters(ctxt->userData,
3978 tmp, nbchar);
3979 }
3980 }
3981 return;
3982 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003983
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003984get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003985 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003986 while (test_char_data[*in]) {
3987 in++;
3988 ccol++;
3989 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003990 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003991 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003992 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003993 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003994 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003995 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003996 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003997 }
3998 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003999 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004000 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004001 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004002 return;
4003 }
4004 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004005 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004006 goto get_more;
4007 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004008 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004009 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004010 if ((ctxt->sax != NULL) &&
4011 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004012 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004013 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004014 const xmlChar *tmp = ctxt->input->cur;
4015 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004016
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004017 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004018 if (ctxt->sax->ignorableWhitespace != NULL)
4019 ctxt->sax->ignorableWhitespace(ctxt->userData,
4020 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004021 } else {
4022 if (ctxt->sax->characters != NULL)
4023 ctxt->sax->characters(ctxt->userData,
4024 tmp, nbchar);
4025 if (*ctxt->space == -1)
4026 *ctxt->space = -2;
4027 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004028 line = ctxt->input->line;
4029 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004030 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004031 if (ctxt->sax->characters != NULL)
4032 ctxt->sax->characters(ctxt->userData,
4033 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004034 line = ctxt->input->line;
4035 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004036 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004037 }
4038 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004039 if (*in == 0xD) {
4040 in++;
4041 if (*in == 0xA) {
4042 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004043 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004044 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004045 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004046 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004047 in--;
4048 }
4049 if (*in == '<') {
4050 return;
4051 }
4052 if (*in == '&') {
4053 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004054 }
4055 SHRINK;
4056 GROW;
4057 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004058 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004059 nbchar = 0;
4060 }
Daniel Veillard50582112001-03-26 22:52:16 +00004061 ctxt->input->line = line;
4062 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004063 xmlParseCharDataComplex(ctxt, cdata);
4064}
4065
Daniel Veillard01c13b52002-12-10 15:19:08 +00004066/**
4067 * xmlParseCharDataComplex:
4068 * @ctxt: an XML parser context
4069 * @cdata: int indicating whether we are within a CDATA section
4070 *
4071 * parse a CharData section.this is the fallback function
4072 * of xmlParseCharData() when the parsing requires handling
4073 * of non-ASCII characters.
4074 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004075void
4076xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004077 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4078 int nbchar = 0;
4079 int cur, l;
4080 int count = 0;
4081
4082 SHRINK;
4083 GROW;
4084 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004085 while ((cur != '<') && /* checked */
4086 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004087 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004088 if ((cur == ']') && (NXT(1) == ']') &&
4089 (NXT(2) == '>')) {
4090 if (cdata) break;
4091 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004092 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004093 }
4094 }
4095 COPY_BUF(l,buf,nbchar,cur);
4096 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004097 buf[nbchar] = 0;
4098
Owen Taylor3473f882001-02-23 17:55:21 +00004099 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004100 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004101 */
4102 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004103 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004104 if (ctxt->sax->ignorableWhitespace != NULL)
4105 ctxt->sax->ignorableWhitespace(ctxt->userData,
4106 buf, nbchar);
4107 } else {
4108 if (ctxt->sax->characters != NULL)
4109 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004110 if ((ctxt->sax->characters !=
4111 ctxt->sax->ignorableWhitespace) &&
4112 (*ctxt->space == -1))
4113 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004114 }
4115 }
4116 nbchar = 0;
4117 }
4118 count++;
4119 if (count > 50) {
4120 GROW;
4121 count = 0;
4122 }
4123 NEXTL(l);
4124 cur = CUR_CHAR(l);
4125 }
4126 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004127 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004128 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004129 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004130 */
4131 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004132 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004133 if (ctxt->sax->ignorableWhitespace != NULL)
4134 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4135 } else {
4136 if (ctxt->sax->characters != NULL)
4137 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004138 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4139 (*ctxt->space == -1))
4140 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004141 }
4142 }
4143 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004144 if ((cur != 0) && (!IS_CHAR(cur))) {
4145 /* Generate the error and skip the offending character */
4146 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4147 "PCDATA invalid Char value %d\n",
4148 cur);
4149 NEXTL(l);
4150 }
Owen Taylor3473f882001-02-23 17:55:21 +00004151}
4152
4153/**
4154 * xmlParseExternalID:
4155 * @ctxt: an XML parser context
4156 * @publicID: a xmlChar** receiving PubidLiteral
4157 * @strict: indicate whether we should restrict parsing to only
4158 * production [75], see NOTE below
4159 *
4160 * Parse an External ID or a Public ID
4161 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004162 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004163 * 'PUBLIC' S PubidLiteral S SystemLiteral
4164 *
4165 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4166 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4167 *
4168 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4169 *
4170 * Returns the function returns SystemLiteral and in the second
4171 * case publicID receives PubidLiteral, is strict is off
4172 * it is possible to return NULL and have publicID set.
4173 */
4174
4175xmlChar *
4176xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4177 xmlChar *URI = NULL;
4178
4179 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004180
4181 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004182 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004183 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004184 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004185 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4186 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004187 }
4188 SKIP_BLANKS;
4189 URI = xmlParseSystemLiteral(ctxt);
4190 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004191 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004193 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004194 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004195 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004196 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004197 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004198 }
4199 SKIP_BLANKS;
4200 *publicID = xmlParsePubidLiteral(ctxt);
4201 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004202 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004203 }
4204 if (strict) {
4205 /*
4206 * We don't handle [83] so "S SystemLiteral" is required.
4207 */
William M. Brack76e95df2003-10-18 16:20:14 +00004208 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004209 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004210 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004211 }
4212 } else {
4213 /*
4214 * We handle [83] so we return immediately, if
4215 * "S SystemLiteral" is not detected. From a purely parsing
4216 * point of view that's a nice mess.
4217 */
4218 const xmlChar *ptr;
4219 GROW;
4220
4221 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004222 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004223
William M. Brack76e95df2003-10-18 16:20:14 +00004224 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004225 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4226 }
4227 SKIP_BLANKS;
4228 URI = xmlParseSystemLiteral(ctxt);
4229 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004230 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004231 }
4232 }
4233 return(URI);
4234}
4235
4236/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004237 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004238 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004239 * @buf: the already parsed part of the buffer
4240 * @len: number of bytes filles in the buffer
4241 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004242 *
4243 * Skip an XML (SGML) comment <!-- .... -->
4244 * The spec says that "For compatibility, the string "--" (double-hyphen)
4245 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004246 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004247 *
4248 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4249 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004250static void
4251xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004252 int q, ql;
4253 int r, rl;
4254 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004255 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004256 int inputid;
4257
4258 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004259
Owen Taylor3473f882001-02-23 17:55:21 +00004260 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004261 len = 0;
4262 size = XML_PARSER_BUFFER_SIZE;
4263 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4264 if (buf == NULL) {
4265 xmlErrMemory(ctxt, NULL);
4266 return;
4267 }
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004269 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004270 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004271 if (q == 0)
4272 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004273 if (!IS_CHAR(q)) {
4274 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4275 "xmlParseComment: invalid xmlChar value %d\n",
4276 q);
4277 xmlFree (buf);
4278 return;
4279 }
Owen Taylor3473f882001-02-23 17:55:21 +00004280 NEXTL(ql);
4281 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004282 if (r == 0)
4283 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004284 if (!IS_CHAR(r)) {
4285 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4286 "xmlParseComment: invalid xmlChar value %d\n",
4287 q);
4288 xmlFree (buf);
4289 return;
4290 }
Owen Taylor3473f882001-02-23 17:55:21 +00004291 NEXTL(rl);
4292 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004293 if (cur == 0)
4294 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004295 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004296 ((cur != '>') ||
4297 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004298 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004299 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004300 }
4301 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004302 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004303 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004304 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4305 if (new_buf == NULL) {
4306 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004307 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004308 return;
4309 }
William M. Bracka3215c72004-07-31 16:24:01 +00004310 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004311 }
4312 COPY_BUF(ql,buf,len,q);
4313 q = r;
4314 ql = rl;
4315 r = cur;
4316 rl = l;
4317
4318 count++;
4319 if (count > 50) {
4320 GROW;
4321 count = 0;
4322 }
4323 NEXTL(l);
4324 cur = CUR_CHAR(l);
4325 if (cur == 0) {
4326 SHRINK;
4327 GROW;
4328 cur = CUR_CHAR(l);
4329 }
4330 }
4331 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004332 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004333 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004334 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004335 } else if (!IS_CHAR(cur)) {
4336 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4337 "xmlParseComment: invalid xmlChar value %d\n",
4338 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004339 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004340 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004341 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4342 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 NEXT;
4345 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4346 (!ctxt->disableSAX))
4347 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
Daniel Veillardda629342007-08-01 07:49:06 +00004349 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004350 return;
4351not_terminated:
4352 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4353 "Comment not terminated\n", NULL);
4354 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004355 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004356}
Daniel Veillardda629342007-08-01 07:49:06 +00004357
Daniel Veillard4c778d82005-01-23 17:37:44 +00004358/**
4359 * xmlParseComment:
4360 * @ctxt: an XML parser context
4361 *
4362 * Skip an XML (SGML) comment <!-- .... -->
4363 * The spec says that "For compatibility, the string "--" (double-hyphen)
4364 * must not occur within comments. "
4365 *
4366 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4367 */
4368void
4369xmlParseComment(xmlParserCtxtPtr ctxt) {
4370 xmlChar *buf = NULL;
4371 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004372 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004373 xmlParserInputState state;
4374 const xmlChar *in;
4375 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004376 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004377
4378 /*
4379 * Check that there is a comment right here.
4380 */
4381 if ((RAW != '<') || (NXT(1) != '!') ||
4382 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004383 state = ctxt->instate;
4384 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004385 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004386 SKIP(4);
4387 SHRINK;
4388 GROW;
4389
4390 /*
4391 * Accelerated common case where input don't need to be
4392 * modified before passing it to the handler.
4393 */
4394 in = ctxt->input->cur;
4395 do {
4396 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004397 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004398 ctxt->input->line++; ctxt->input->col = 1;
4399 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004400 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004401 }
4402get_more:
4403 ccol = ctxt->input->col;
4404 while (((*in > '-') && (*in <= 0x7F)) ||
4405 ((*in >= 0x20) && (*in < '-')) ||
4406 (*in == 0x09)) {
4407 in++;
4408 ccol++;
4409 }
4410 ctxt->input->col = ccol;
4411 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004412 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004413 ctxt->input->line++; ctxt->input->col = 1;
4414 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004415 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004416 goto get_more;
4417 }
4418 nbchar = in - ctxt->input->cur;
4419 /*
4420 * save current set of data
4421 */
4422 if (nbchar > 0) {
4423 if ((ctxt->sax != NULL) &&
4424 (ctxt->sax->comment != NULL)) {
4425 if (buf == NULL) {
4426 if ((*in == '-') && (in[1] == '-'))
4427 size = nbchar + 1;
4428 else
4429 size = XML_PARSER_BUFFER_SIZE + nbchar;
4430 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4431 if (buf == NULL) {
4432 xmlErrMemory(ctxt, NULL);
4433 ctxt->instate = state;
4434 return;
4435 }
4436 len = 0;
4437 } else if (len + nbchar + 1 >= size) {
4438 xmlChar *new_buf;
4439 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4440 new_buf = (xmlChar *) xmlRealloc(buf,
4441 size * sizeof(xmlChar));
4442 if (new_buf == NULL) {
4443 xmlFree (buf);
4444 xmlErrMemory(ctxt, NULL);
4445 ctxt->instate = state;
4446 return;
4447 }
4448 buf = new_buf;
4449 }
4450 memcpy(&buf[len], ctxt->input->cur, nbchar);
4451 len += nbchar;
4452 buf[len] = 0;
4453 }
4454 }
4455 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004456 if (*in == 0xA) {
4457 in++;
4458 ctxt->input->line++; ctxt->input->col = 1;
4459 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004460 if (*in == 0xD) {
4461 in++;
4462 if (*in == 0xA) {
4463 ctxt->input->cur = in;
4464 in++;
4465 ctxt->input->line++; ctxt->input->col = 1;
4466 continue; /* while */
4467 }
4468 in--;
4469 }
4470 SHRINK;
4471 GROW;
4472 in = ctxt->input->cur;
4473 if (*in == '-') {
4474 if (in[1] == '-') {
4475 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004476 if (ctxt->input->id != inputid) {
4477 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4478 "comment doesn't start and stop in the same entity\n");
4479 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004480 SKIP(3);
4481 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4482 (!ctxt->disableSAX)) {
4483 if (buf != NULL)
4484 ctxt->sax->comment(ctxt->userData, buf);
4485 else
4486 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4487 }
4488 if (buf != NULL)
4489 xmlFree(buf);
4490 ctxt->instate = state;
4491 return;
4492 }
4493 if (buf != NULL)
4494 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4495 "Comment not terminated \n<!--%.50s\n",
4496 buf);
4497 else
4498 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4499 "Comment not terminated \n", NULL);
4500 in++;
4501 ctxt->input->col++;
4502 }
4503 in++;
4504 ctxt->input->col++;
4505 goto get_more;
4506 }
4507 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4508 xmlParseCommentComplex(ctxt, buf, len, size);
4509 ctxt->instate = state;
4510 return;
4511}
4512
Owen Taylor3473f882001-02-23 17:55:21 +00004513
4514/**
4515 * xmlParsePITarget:
4516 * @ctxt: an XML parser context
4517 *
4518 * parse the name of a PI
4519 *
4520 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4521 *
4522 * Returns the PITarget name or NULL
4523 */
4524
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004525const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004526xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004527 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004528
4529 name = xmlParseName(ctxt);
4530 if ((name != NULL) &&
4531 ((name[0] == 'x') || (name[0] == 'X')) &&
4532 ((name[1] == 'm') || (name[1] == 'M')) &&
4533 ((name[2] == 'l') || (name[2] == 'L'))) {
4534 int i;
4535 if ((name[0] == 'x') && (name[1] == 'm') &&
4536 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004537 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004538 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004539 return(name);
4540 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004541 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 return(name);
4543 }
4544 for (i = 0;;i++) {
4545 if (xmlW3CPIs[i] == NULL) break;
4546 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4547 return(name);
4548 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004549 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4550 "xmlParsePITarget: invalid name prefix 'xml'\n",
4551 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 }
Daniel Veillard37334572008-07-31 08:20:02 +00004553 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4554 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4555 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4556 }
Owen Taylor3473f882001-02-23 17:55:21 +00004557 return(name);
4558}
4559
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004560#ifdef LIBXML_CATALOG_ENABLED
4561/**
4562 * xmlParseCatalogPI:
4563 * @ctxt: an XML parser context
4564 * @catalog: the PI value string
4565 *
4566 * parse an XML Catalog Processing Instruction.
4567 *
4568 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4569 *
4570 * Occurs only if allowed by the user and if happening in the Misc
4571 * part of the document before any doctype informations
4572 * This will add the given catalog to the parsing context in order
4573 * to be used if there is a resolution need further down in the document
4574 */
4575
4576static void
4577xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4578 xmlChar *URL = NULL;
4579 const xmlChar *tmp, *base;
4580 xmlChar marker;
4581
4582 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004583 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004584 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4585 goto error;
4586 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004587 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004588 if (*tmp != '=') {
4589 return;
4590 }
4591 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004592 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004593 marker = *tmp;
4594 if ((marker != '\'') && (marker != '"'))
4595 goto error;
4596 tmp++;
4597 base = tmp;
4598 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4599 if (*tmp == 0)
4600 goto error;
4601 URL = xmlStrndup(base, tmp - base);
4602 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004603 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004604 if (*tmp != 0)
4605 goto error;
4606
4607 if (URL != NULL) {
4608 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4609 xmlFree(URL);
4610 }
4611 return;
4612
4613error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004614 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4615 "Catalog PI syntax error: %s\n",
4616 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004617 if (URL != NULL)
4618 xmlFree(URL);
4619}
4620#endif
4621
Owen Taylor3473f882001-02-23 17:55:21 +00004622/**
4623 * xmlParsePI:
4624 * @ctxt: an XML parser context
4625 *
4626 * parse an XML Processing Instruction.
4627 *
4628 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4629 *
4630 * The processing is transfered to SAX once parsed.
4631 */
4632
4633void
4634xmlParsePI(xmlParserCtxtPtr ctxt) {
4635 xmlChar *buf = NULL;
4636 int len = 0;
4637 int size = XML_PARSER_BUFFER_SIZE;
4638 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004639 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004640 xmlParserInputState state;
4641 int count = 0;
4642
4643 if ((RAW == '<') && (NXT(1) == '?')) {
4644 xmlParserInputPtr input = ctxt->input;
4645 state = ctxt->instate;
4646 ctxt->instate = XML_PARSER_PI;
4647 /*
4648 * this is a Processing Instruction.
4649 */
4650 SKIP(2);
4651 SHRINK;
4652
4653 /*
4654 * Parse the target name and check for special support like
4655 * namespace.
4656 */
4657 target = xmlParsePITarget(ctxt);
4658 if (target != NULL) {
4659 if ((RAW == '?') && (NXT(1) == '>')) {
4660 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004661 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4662 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004663 }
4664 SKIP(2);
4665
4666 /*
4667 * SAX: PI detected.
4668 */
4669 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4670 (ctxt->sax->processingInstruction != NULL))
4671 ctxt->sax->processingInstruction(ctxt->userData,
4672 target, NULL);
4673 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 return;
4675 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004676 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004677 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 ctxt->instate = state;
4680 return;
4681 }
4682 cur = CUR;
4683 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004684 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4685 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 }
4687 SKIP_BLANKS;
4688 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004689 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004690 ((cur != '?') || (NXT(1) != '>'))) {
4691 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004692 xmlChar *tmp;
4693
Owen Taylor3473f882001-02-23 17:55:21 +00004694 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004695 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4696 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004697 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004698 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 ctxt->instate = state;
4700 return;
4701 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004702 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
4704 count++;
4705 if (count > 50) {
4706 GROW;
4707 count = 0;
4708 }
4709 COPY_BUF(l,buf,len,cur);
4710 NEXTL(l);
4711 cur = CUR_CHAR(l);
4712 if (cur == 0) {
4713 SHRINK;
4714 GROW;
4715 cur = CUR_CHAR(l);
4716 }
4717 }
4718 buf[len] = 0;
4719 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004720 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4721 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004722 } else {
4723 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4725 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004726 }
4727 SKIP(2);
4728
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004729#ifdef LIBXML_CATALOG_ENABLED
4730 if (((state == XML_PARSER_MISC) ||
4731 (state == XML_PARSER_START)) &&
4732 (xmlStrEqual(target, XML_CATALOG_PI))) {
4733 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4734 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4735 (allow == XML_CATA_ALLOW_ALL))
4736 xmlParseCatalogPI(ctxt, buf);
4737 }
4738#endif
4739
4740
Owen Taylor3473f882001-02-23 17:55:21 +00004741 /*
4742 * SAX: PI detected.
4743 */
4744 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4745 (ctxt->sax->processingInstruction != NULL))
4746 ctxt->sax->processingInstruction(ctxt->userData,
4747 target, buf);
4748 }
4749 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004751 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 }
4753 ctxt->instate = state;
4754 }
4755}
4756
4757/**
4758 * xmlParseNotationDecl:
4759 * @ctxt: an XML parser context
4760 *
4761 * parse a notation declaration
4762 *
4763 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4764 *
4765 * Hence there is actually 3 choices:
4766 * 'PUBLIC' S PubidLiteral
4767 * 'PUBLIC' S PubidLiteral S SystemLiteral
4768 * and 'SYSTEM' S SystemLiteral
4769 *
4770 * See the NOTE on xmlParseExternalID().
4771 */
4772
4773void
4774xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004775 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004776 xmlChar *Pubid;
4777 xmlChar *Systemid;
4778
Daniel Veillarda07050d2003-10-19 14:46:32 +00004779 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004780 xmlParserInputPtr input = ctxt->input;
4781 SHRINK;
4782 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004783 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 return;
4787 }
4788 SKIP_BLANKS;
4789
Daniel Veillard76d66f42001-05-16 21:05:17 +00004790 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004791 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 return;
4794 }
William M. Brack76e95df2003-10-18 16:20:14 +00004795 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004796 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004797 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004798 return;
4799 }
Daniel Veillard37334572008-07-31 08:20:02 +00004800 if (xmlStrchr(name, ':') != NULL) {
4801 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4802 "colon are forbidden from notation names '%s'\n",
4803 name, NULL, NULL);
4804 }
Owen Taylor3473f882001-02-23 17:55:21 +00004805 SKIP_BLANKS;
4806
4807 /*
4808 * Parse the IDs.
4809 */
4810 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4811 SKIP_BLANKS;
4812
4813 if (RAW == '>') {
4814 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004815 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4816 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 NEXT;
4819 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4820 (ctxt->sax->notationDecl != NULL))
4821 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4822 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004823 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004824 }
Owen Taylor3473f882001-02-23 17:55:21 +00004825 if (Systemid != NULL) xmlFree(Systemid);
4826 if (Pubid != NULL) xmlFree(Pubid);
4827 }
4828}
4829
4830/**
4831 * xmlParseEntityDecl:
4832 * @ctxt: an XML parser context
4833 *
4834 * parse <!ENTITY declarations
4835 *
4836 * [70] EntityDecl ::= GEDecl | PEDecl
4837 *
4838 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4839 *
4840 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4841 *
4842 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4843 *
4844 * [74] PEDef ::= EntityValue | ExternalID
4845 *
4846 * [76] NDataDecl ::= S 'NDATA' S Name
4847 *
4848 * [ VC: Notation Declared ]
4849 * The Name must match the declared name of a notation.
4850 */
4851
4852void
4853xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004854 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004855 xmlChar *value = NULL;
4856 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004857 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004858 int isParameter = 0;
4859 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004860 int skipped;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00004861 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00004862
Daniel Veillard4c778d82005-01-23 17:37:44 +00004863 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004864 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004865 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004866 SHRINK;
4867 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004868 skipped = SKIP_BLANKS;
4869 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004870 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4871 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004872 }
Owen Taylor3473f882001-02-23 17:55:21 +00004873
4874 if (RAW == '%') {
4875 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004876 skipped = SKIP_BLANKS;
4877 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004878 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4879 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004880 }
Owen Taylor3473f882001-02-23 17:55:21 +00004881 isParameter = 1;
4882 }
4883
Daniel Veillard76d66f42001-05-16 21:05:17 +00004884 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004885 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004886 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4887 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004888 return;
4889 }
Daniel Veillard37334572008-07-31 08:20:02 +00004890 if (xmlStrchr(name, ':') != NULL) {
4891 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4892 "colon are forbidden from entities names '%s'\n",
4893 name, NULL, NULL);
4894 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004895 skipped = SKIP_BLANKS;
4896 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004897 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4898 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004899 }
Owen Taylor3473f882001-02-23 17:55:21 +00004900
Daniel Veillardf5582f12002-06-11 10:08:16 +00004901 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004902 /*
4903 * handle the various case of definitions...
4904 */
4905 if (isParameter) {
4906 if ((RAW == '"') || (RAW == '\'')) {
4907 value = xmlParseEntityValue(ctxt, &orig);
4908 if (value) {
4909 if ((ctxt->sax != NULL) &&
4910 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4911 ctxt->sax->entityDecl(ctxt->userData, name,
4912 XML_INTERNAL_PARAMETER_ENTITY,
4913 NULL, NULL, value);
4914 }
4915 } else {
4916 URI = xmlParseExternalID(ctxt, &literal, 1);
4917 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004918 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 }
4920 if (URI) {
4921 xmlURIPtr uri;
4922
4923 uri = xmlParseURI((const char *) URI);
4924 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004925 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4926 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004927 /*
4928 * This really ought to be a well formedness error
4929 * but the XML Core WG decided otherwise c.f. issue
4930 * E26 of the XML erratas.
4931 */
Owen Taylor3473f882001-02-23 17:55:21 +00004932 } else {
4933 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004934 /*
4935 * Okay this is foolish to block those but not
4936 * invalid URIs.
4937 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004938 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004939 } else {
4940 if ((ctxt->sax != NULL) &&
4941 (!ctxt->disableSAX) &&
4942 (ctxt->sax->entityDecl != NULL))
4943 ctxt->sax->entityDecl(ctxt->userData, name,
4944 XML_EXTERNAL_PARAMETER_ENTITY,
4945 literal, URI, NULL);
4946 }
4947 xmlFreeURI(uri);
4948 }
4949 }
4950 }
4951 } else {
4952 if ((RAW == '"') || (RAW == '\'')) {
4953 value = xmlParseEntityValue(ctxt, &orig);
4954 if ((ctxt->sax != NULL) &&
4955 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4956 ctxt->sax->entityDecl(ctxt->userData, name,
4957 XML_INTERNAL_GENERAL_ENTITY,
4958 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004959 /*
4960 * For expat compatibility in SAX mode.
4961 */
4962 if ((ctxt->myDoc == NULL) ||
4963 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4964 if (ctxt->myDoc == NULL) {
4965 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004966 if (ctxt->myDoc == NULL) {
4967 xmlErrMemory(ctxt, "New Doc failed");
4968 return;
4969 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00004970 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00004971 }
4972 if (ctxt->myDoc->intSubset == NULL)
4973 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4974 BAD_CAST "fake", NULL, NULL);
4975
Daniel Veillard1af9a412003-08-20 22:54:39 +00004976 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4977 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004978 }
Owen Taylor3473f882001-02-23 17:55:21 +00004979 } else {
4980 URI = xmlParseExternalID(ctxt, &literal, 1);
4981 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004982 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004983 }
4984 if (URI) {
4985 xmlURIPtr uri;
4986
4987 uri = xmlParseURI((const char *)URI);
4988 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004989 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4990 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004991 /*
4992 * This really ought to be a well formedness error
4993 * but the XML Core WG decided otherwise c.f. issue
4994 * E26 of the XML erratas.
4995 */
Owen Taylor3473f882001-02-23 17:55:21 +00004996 } else {
4997 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004998 /*
4999 * Okay this is foolish to block those but not
5000 * invalid URIs.
5001 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005002 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005003 }
5004 xmlFreeURI(uri);
5005 }
5006 }
William M. Brack76e95df2003-10-18 16:20:14 +00005007 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
5011 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005012 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005013 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005014 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005015 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5016 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005017 }
5018 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005019 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005020 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5021 (ctxt->sax->unparsedEntityDecl != NULL))
5022 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5023 literal, URI, ndata);
5024 } else {
5025 if ((ctxt->sax != NULL) &&
5026 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5027 ctxt->sax->entityDecl(ctxt->userData, name,
5028 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5029 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005030 /*
5031 * For expat compatibility in SAX mode.
5032 * assuming the entity repalcement was asked for
5033 */
5034 if ((ctxt->replaceEntities != 0) &&
5035 ((ctxt->myDoc == NULL) ||
5036 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5037 if (ctxt->myDoc == NULL) {
5038 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005039 if (ctxt->myDoc == NULL) {
5040 xmlErrMemory(ctxt, "New Doc failed");
5041 return;
5042 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005043 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005044 }
5045
5046 if (ctxt->myDoc->intSubset == NULL)
5047 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5048 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005049 xmlSAX2EntityDecl(ctxt, name,
5050 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5051 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005052 }
Owen Taylor3473f882001-02-23 17:55:21 +00005053 }
5054 }
5055 }
5056 SKIP_BLANKS;
5057 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005058 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005059 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005060 } else {
5061 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005062 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5063 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005064 }
5065 NEXT;
5066 }
5067 if (orig != NULL) {
5068 /*
5069 * Ugly mechanism to save the raw entity value.
5070 */
5071 xmlEntityPtr cur = NULL;
5072
5073 if (isParameter) {
5074 if ((ctxt->sax != NULL) &&
5075 (ctxt->sax->getParameterEntity != NULL))
5076 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5077 } else {
5078 if ((ctxt->sax != NULL) &&
5079 (ctxt->sax->getEntity != NULL))
5080 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005081 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005082 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005083 }
Owen Taylor3473f882001-02-23 17:55:21 +00005084 }
5085 if (cur != NULL) {
Daniel Veillardf4f4e482008-08-25 08:57:48 +00005086 cur->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00005087 if (cur->orig != NULL)
5088 xmlFree(orig);
5089 else
5090 cur->orig = orig;
5091 } else
5092 xmlFree(orig);
5093 }
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if (value != NULL) xmlFree(value);
5095 if (URI != NULL) xmlFree(URI);
5096 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 }
5098}
5099
5100/**
5101 * xmlParseDefaultDecl:
5102 * @ctxt: an XML parser context
5103 * @value: Receive a possible fixed default value for the attribute
5104 *
5105 * Parse an attribute default declaration
5106 *
5107 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5108 *
5109 * [ VC: Required Attribute ]
5110 * if the default declaration is the keyword #REQUIRED, then the
5111 * attribute must be specified for all elements of the type in the
5112 * attribute-list declaration.
5113 *
5114 * [ VC: Attribute Default Legal ]
5115 * The declared default value must meet the lexical constraints of
5116 * the declared attribute type c.f. xmlValidateAttributeDecl()
5117 *
5118 * [ VC: Fixed Attribute Default ]
5119 * if an attribute has a default value declared with the #FIXED
5120 * keyword, instances of that attribute must match the default value.
5121 *
5122 * [ WFC: No < in Attribute Values ]
5123 * handled in xmlParseAttValue()
5124 *
5125 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5126 * or XML_ATTRIBUTE_FIXED.
5127 */
5128
5129int
5130xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5131 int val;
5132 xmlChar *ret;
5133
5134 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005135 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005136 SKIP(9);
5137 return(XML_ATTRIBUTE_REQUIRED);
5138 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005139 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005140 SKIP(8);
5141 return(XML_ATTRIBUTE_IMPLIED);
5142 }
5143 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005144 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005145 SKIP(6);
5146 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005147 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005150 }
5151 SKIP_BLANKS;
5152 }
5153 ret = xmlParseAttValue(ctxt);
5154 ctxt->instate = XML_PARSER_DTD;
5155 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005156 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005157 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005158 } else
5159 *value = ret;
5160 return(val);
5161}
5162
5163/**
5164 * xmlParseNotationType:
5165 * @ctxt: an XML parser context
5166 *
5167 * parse an Notation attribute type.
5168 *
5169 * Note: the leading 'NOTATION' S part has already being parsed...
5170 *
5171 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5172 *
5173 * [ VC: Notation Attributes ]
5174 * Values of this type must match one of the notation names included
5175 * in the declaration; all notation names in the declaration must be declared.
5176 *
5177 * Returns: the notation attribute tree built while parsing
5178 */
5179
5180xmlEnumerationPtr
5181xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005182 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005183 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5184
5185 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005186 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005187 return(NULL);
5188 }
5189 SHRINK;
5190 do {
5191 NEXT;
5192 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005193 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5196 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005197 return(ret);
5198 }
5199 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 if (cur == NULL) return(ret);
5201 if (last == NULL) ret = last = cur;
5202 else {
5203 last->next = cur;
5204 last = cur;
5205 }
5206 SKIP_BLANKS;
5207 } while (RAW == '|');
5208 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005209 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 if ((last != NULL) && (last != ret))
5211 xmlFreeEnumeration(last);
5212 return(ret);
5213 }
5214 NEXT;
5215 return(ret);
5216}
5217
5218/**
5219 * xmlParseEnumerationType:
5220 * @ctxt: an XML parser context
5221 *
5222 * parse an Enumeration attribute type.
5223 *
5224 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5225 *
5226 * [ VC: Enumeration ]
5227 * Values of this type must match one of the Nmtoken tokens in
5228 * the declaration
5229 *
5230 * Returns: the enumeration attribute tree built while parsing
5231 */
5232
5233xmlEnumerationPtr
5234xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5235 xmlChar *name;
5236 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5237
5238 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005239 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 return(NULL);
5241 }
5242 SHRINK;
5243 do {
5244 NEXT;
5245 SKIP_BLANKS;
5246 name = xmlParseNmtoken(ctxt);
5247 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005248 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005249 return(ret);
5250 }
5251 cur = xmlCreateEnumeration(name);
5252 xmlFree(name);
5253 if (cur == NULL) return(ret);
5254 if (last == NULL) ret = last = cur;
5255 else {
5256 last->next = cur;
5257 last = cur;
5258 }
5259 SKIP_BLANKS;
5260 } while (RAW == '|');
5261 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005262 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005263 return(ret);
5264 }
5265 NEXT;
5266 return(ret);
5267}
5268
5269/**
5270 * xmlParseEnumeratedType:
5271 * @ctxt: an XML parser context
5272 * @tree: the enumeration tree built while parsing
5273 *
5274 * parse an Enumerated attribute type.
5275 *
5276 * [57] EnumeratedType ::= NotationType | Enumeration
5277 *
5278 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5279 *
5280 *
5281 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5282 */
5283
5284int
5285xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005286 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005287 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005288 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005289 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005291 return(0);
5292 }
5293 SKIP_BLANKS;
5294 *tree = xmlParseNotationType(ctxt);
5295 if (*tree == NULL) return(0);
5296 return(XML_ATTRIBUTE_NOTATION);
5297 }
5298 *tree = xmlParseEnumerationType(ctxt);
5299 if (*tree == NULL) return(0);
5300 return(XML_ATTRIBUTE_ENUMERATION);
5301}
5302
5303/**
5304 * xmlParseAttributeType:
5305 * @ctxt: an XML parser context
5306 * @tree: the enumeration tree built while parsing
5307 *
5308 * parse the Attribute list def for an element
5309 *
5310 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5311 *
5312 * [55] StringType ::= 'CDATA'
5313 *
5314 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5315 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5316 *
5317 * Validity constraints for attribute values syntax are checked in
5318 * xmlValidateAttributeValue()
5319 *
5320 * [ VC: ID ]
5321 * Values of type ID must match the Name production. A name must not
5322 * appear more than once in an XML document as a value of this type;
5323 * i.e., ID values must uniquely identify the elements which bear them.
5324 *
5325 * [ VC: One ID per Element Type ]
5326 * No element type may have more than one ID attribute specified.
5327 *
5328 * [ VC: ID Attribute Default ]
5329 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5330 *
5331 * [ VC: IDREF ]
5332 * Values of type IDREF must match the Name production, and values
5333 * of type IDREFS must match Names; each IDREF Name must match the value
5334 * of an ID attribute on some element in the XML document; i.e. IDREF
5335 * values must match the value of some ID attribute.
5336 *
5337 * [ VC: Entity Name ]
5338 * Values of type ENTITY must match the Name production, values
5339 * of type ENTITIES must match Names; each Entity Name must match the
5340 * name of an unparsed entity declared in the DTD.
5341 *
5342 * [ VC: Name Token ]
5343 * Values of type NMTOKEN must match the Nmtoken production; values
5344 * of type NMTOKENS must match Nmtokens.
5345 *
5346 * Returns the attribute type
5347 */
5348int
5349xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5350 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005351 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005352 SKIP(5);
5353 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005354 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005355 SKIP(6);
5356 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005357 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005358 SKIP(5);
5359 return(XML_ATTRIBUTE_IDREF);
5360 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5361 SKIP(2);
5362 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005363 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005364 SKIP(6);
5365 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005366 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005367 SKIP(8);
5368 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005369 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005370 SKIP(8);
5371 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005372 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005373 SKIP(7);
5374 return(XML_ATTRIBUTE_NMTOKEN);
5375 }
5376 return(xmlParseEnumeratedType(ctxt, tree));
5377}
5378
5379/**
5380 * xmlParseAttributeListDecl:
5381 * @ctxt: an XML parser context
5382 *
5383 * : parse the Attribute list def for an element
5384 *
5385 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5386 *
5387 * [53] AttDef ::= S Name S AttType S DefaultDecl
5388 *
5389 */
5390void
5391xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005392 const xmlChar *elemName;
5393 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005394 xmlEnumerationPtr tree;
5395
Daniel Veillarda07050d2003-10-19 14:46:32 +00005396 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005397 xmlParserInputPtr input = ctxt->input;
5398
5399 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005400 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005401 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005402 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005403 }
5404 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005405 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005407 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5408 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005409 return;
5410 }
5411 SKIP_BLANKS;
5412 GROW;
5413 while (RAW != '>') {
5414 const xmlChar *check = CUR_PTR;
5415 int type;
5416 int def;
5417 xmlChar *defaultValue = NULL;
5418
5419 GROW;
5420 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005421 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005422 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5424 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005425 break;
5426 }
5427 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005428 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005430 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005431 break;
5432 }
5433 SKIP_BLANKS;
5434
5435 type = xmlParseAttributeType(ctxt, &tree);
5436 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005437 break;
5438 }
5439
5440 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005441 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5443 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005444 if (tree != NULL)
5445 xmlFreeEnumeration(tree);
5446 break;
5447 }
5448 SKIP_BLANKS;
5449
5450 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5451 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005452 if (defaultValue != NULL)
5453 xmlFree(defaultValue);
5454 if (tree != NULL)
5455 xmlFreeEnumeration(tree);
5456 break;
5457 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005458 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5459 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005460
5461 GROW;
5462 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005463 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005465 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005466 if (defaultValue != NULL)
5467 xmlFree(defaultValue);
5468 if (tree != NULL)
5469 xmlFreeEnumeration(tree);
5470 break;
5471 }
5472 SKIP_BLANKS;
5473 }
5474 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005475 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5476 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005477 if (defaultValue != NULL)
5478 xmlFree(defaultValue);
5479 if (tree != NULL)
5480 xmlFreeEnumeration(tree);
5481 break;
5482 }
5483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5484 (ctxt->sax->attributeDecl != NULL))
5485 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5486 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005487 else if (tree != NULL)
5488 xmlFreeEnumeration(tree);
5489
5490 if ((ctxt->sax2) && (defaultValue != NULL) &&
5491 (def != XML_ATTRIBUTE_IMPLIED) &&
5492 (def != XML_ATTRIBUTE_REQUIRED)) {
5493 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5494 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005495 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005496 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (defaultValue != NULL)
5499 xmlFree(defaultValue);
5500 GROW;
5501 }
5502 if (RAW == '>') {
5503 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005504 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5505 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005506 }
5507 NEXT;
5508 }
Owen Taylor3473f882001-02-23 17:55:21 +00005509 }
5510}
5511
5512/**
5513 * xmlParseElementMixedContentDecl:
5514 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005515 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005516 *
5517 * parse the declaration for a Mixed Element content
5518 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5519 *
5520 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5521 * '(' S? '#PCDATA' S? ')'
5522 *
5523 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5524 *
5525 * [ VC: No Duplicate Types ]
5526 * The same name must not appear more than once in a single
5527 * mixed-content declaration.
5528 *
5529 * returns: the list of the xmlElementContentPtr describing the element choices
5530 */
5531xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005532xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005534 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005535
5536 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005537 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005538 SKIP(7);
5539 SKIP_BLANKS;
5540 SHRINK;
5541 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005542 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005543 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5544"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005545 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005546 }
Owen Taylor3473f882001-02-23 17:55:21 +00005547 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005548 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005549 if (ret == NULL)
5550 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 if (RAW == '*') {
5552 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5553 NEXT;
5554 }
5555 return(ret);
5556 }
5557 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005558 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005559 if (ret == NULL) return(NULL);
5560 }
5561 while (RAW == '|') {
5562 NEXT;
5563 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005564 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005565 if (ret == NULL) return(NULL);
5566 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005567 if (cur != NULL)
5568 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 cur = ret;
5570 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005571 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005572 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005573 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005574 if (n->c1 != NULL)
5575 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005577 if (n != NULL)
5578 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005579 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005580 }
5581 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005582 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005583 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005584 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005585 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005586 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005587 return(NULL);
5588 }
5589 SKIP_BLANKS;
5590 GROW;
5591 }
5592 if ((RAW == ')') && (NXT(1) == '*')) {
5593 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005594 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005595 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005596 if (cur->c2 != NULL)
5597 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005598 }
5599 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005600 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005601 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5602"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005603 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005604 }
Owen Taylor3473f882001-02-23 17:55:21 +00005605 SKIP(2);
5606 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005607 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005608 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005609 return(NULL);
5610 }
5611
5612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005613 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005614 }
5615 return(ret);
5616}
5617
5618/**
5619 * xmlParseElementChildrenContentDecl:
5620 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005621 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005622 *
5623 * parse the declaration for a Mixed Element content
5624 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5625 *
5626 *
5627 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5628 *
5629 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5630 *
5631 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5632 *
5633 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5634 *
5635 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5636 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005637 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005638 * opening or closing parentheses in a choice, seq, or Mixed
5639 * construct is contained in the replacement text for a parameter
5640 * entity, both must be contained in the same replacement text. For
5641 * interoperability, if a parameter-entity reference appears in a
5642 * choice, seq, or Mixed construct, its replacement text should not
5643 * be empty, and neither the first nor last non-blank character of
5644 * the replacement text should be a connector (| or ,).
5645 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005646 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005647 * hierarchy.
5648 */
5649xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005650xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005651 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005652 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005653 xmlChar type = 0;
5654
5655 SKIP_BLANKS;
5656 GROW;
5657 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005658 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005659
Owen Taylor3473f882001-02-23 17:55:21 +00005660 /* Recurse on first child */
5661 NEXT;
5662 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005663 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005664 SKIP_BLANKS;
5665 GROW;
5666 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005667 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005669 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005670 return(NULL);
5671 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005672 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005673 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005674 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005675 return(NULL);
5676 }
Owen Taylor3473f882001-02-23 17:55:21 +00005677 GROW;
5678 if (RAW == '?') {
5679 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5680 NEXT;
5681 } else if (RAW == '*') {
5682 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5683 NEXT;
5684 } else if (RAW == '+') {
5685 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5686 NEXT;
5687 } else {
5688 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5689 }
Owen Taylor3473f882001-02-23 17:55:21 +00005690 GROW;
5691 }
5692 SKIP_BLANKS;
5693 SHRINK;
5694 while (RAW != ')') {
5695 /*
5696 * Each loop we parse one separator and one element.
5697 */
5698 if (RAW == ',') {
5699 if (type == 0) type = CUR;
5700
5701 /*
5702 * Detect "Name | Name , Name" error
5703 */
5704 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005705 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005706 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005707 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005708 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005709 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005710 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005711 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005712 return(NULL);
5713 }
5714 NEXT;
5715
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005716 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005717 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005718 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005719 xmlFreeDocElementContent(ctxt->myDoc, last);
5720 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005721 return(NULL);
5722 }
5723 if (last == NULL) {
5724 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005725 if (ret != NULL)
5726 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005727 ret = cur = op;
5728 } else {
5729 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005730 if (op != NULL)
5731 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005732 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005733 if (last != NULL)
5734 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 cur =op;
5736 last = NULL;
5737 }
5738 } else if (RAW == '|') {
5739 if (type == 0) type = CUR;
5740
5741 /*
5742 * Detect "Name , Name | Name" error
5743 */
5744 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005745 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005746 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005747 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005748 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005749 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005751 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005752 return(NULL);
5753 }
5754 NEXT;
5755
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005756 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005758 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005759 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005761 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005762 return(NULL);
5763 }
5764 if (last == NULL) {
5765 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005766 if (ret != NULL)
5767 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005768 ret = cur = op;
5769 } else {
5770 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005771 if (op != NULL)
5772 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005773 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005774 if (last != NULL)
5775 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005776 cur =op;
5777 last = NULL;
5778 }
5779 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005780 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005781 if ((last != NULL) && (last != ret))
5782 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005784 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005785 return(NULL);
5786 }
5787 GROW;
5788 SKIP_BLANKS;
5789 GROW;
5790 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005791 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005792 /* Recurse on second child */
5793 NEXT;
5794 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005795 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005796 SKIP_BLANKS;
5797 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005798 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005800 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005802 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005803 return(NULL);
5804 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005805 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005806 if (last == NULL) {
5807 if (ret != NULL)
5808 xmlFreeDocElementContent(ctxt->myDoc, ret);
5809 return(NULL);
5810 }
Owen Taylor3473f882001-02-23 17:55:21 +00005811 if (RAW == '?') {
5812 last->ocur = XML_ELEMENT_CONTENT_OPT;
5813 NEXT;
5814 } else if (RAW == '*') {
5815 last->ocur = XML_ELEMENT_CONTENT_MULT;
5816 NEXT;
5817 } else if (RAW == '+') {
5818 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5819 NEXT;
5820 } else {
5821 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5822 }
5823 }
5824 SKIP_BLANKS;
5825 GROW;
5826 }
5827 if ((cur != NULL) && (last != NULL)) {
5828 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005829 if (last != NULL)
5830 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005832 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005833 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5834"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005835 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005836 }
Owen Taylor3473f882001-02-23 17:55:21 +00005837 NEXT;
5838 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005839 if (ret != NULL) {
5840 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5841 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5842 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5843 else
5844 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5845 }
Owen Taylor3473f882001-02-23 17:55:21 +00005846 NEXT;
5847 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005848 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005849 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005850 cur = ret;
5851 /*
5852 * Some normalization:
5853 * (a | b* | c?)* == (a | b | c)*
5854 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005855 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005856 if ((cur->c1 != NULL) &&
5857 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5858 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5859 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5860 if ((cur->c2 != NULL) &&
5861 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5862 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5863 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5864 cur = cur->c2;
5865 }
5866 }
Owen Taylor3473f882001-02-23 17:55:21 +00005867 NEXT;
5868 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005869 if (ret != NULL) {
5870 int found = 0;
5871
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005872 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5873 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5874 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005875 else
5876 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005877 /*
5878 * Some normalization:
5879 * (a | b*)+ == (a | b)*
5880 * (a | b?)+ == (a | b)*
5881 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005882 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005883 if ((cur->c1 != NULL) &&
5884 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5885 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5886 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5887 found = 1;
5888 }
5889 if ((cur->c2 != NULL) &&
5890 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5891 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5892 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5893 found = 1;
5894 }
5895 cur = cur->c2;
5896 }
5897 if (found)
5898 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5899 }
Owen Taylor3473f882001-02-23 17:55:21 +00005900 NEXT;
5901 }
5902 return(ret);
5903}
5904
5905/**
5906 * xmlParseElementContentDecl:
5907 * @ctxt: an XML parser context
5908 * @name: the name of the element being defined.
5909 * @result: the Element Content pointer will be stored here if any
5910 *
5911 * parse the declaration for an Element content either Mixed or Children,
5912 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5913 *
5914 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5915 *
5916 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5917 */
5918
5919int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005920xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 xmlElementContentPtr *result) {
5922
5923 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005924 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005925 int res;
5926
5927 *result = NULL;
5928
5929 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005930 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005931 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 return(-1);
5933 }
5934 NEXT;
5935 GROW;
5936 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005937 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005938 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 res = XML_ELEMENT_TYPE_MIXED;
5940 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 res = XML_ELEMENT_TYPE_ELEMENT;
5943 }
Owen Taylor3473f882001-02-23 17:55:21 +00005944 SKIP_BLANKS;
5945 *result = tree;
5946 return(res);
5947}
5948
5949/**
5950 * xmlParseElementDecl:
5951 * @ctxt: an XML parser context
5952 *
5953 * parse an Element declaration.
5954 *
5955 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5956 *
5957 * [ VC: Unique Element Type Declaration ]
5958 * No element type may be declared more than once
5959 *
5960 * Returns the type of the element, or -1 in case of error
5961 */
5962int
5963xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005964 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005965 int ret = -1;
5966 xmlElementContentPtr content = NULL;
5967
Daniel Veillard4c778d82005-01-23 17:37:44 +00005968 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005969 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005970 xmlParserInputPtr input = ctxt->input;
5971
5972 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005973 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5975 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005976 }
5977 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005978 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005980 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5981 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005982 return(-1);
5983 }
5984 while ((RAW == 0) && (ctxt->inputNr > 1))
5985 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005986 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5988 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005989 }
5990 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005991 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005992 SKIP(5);
5993 /*
5994 * Element must always be empty.
5995 */
5996 ret = XML_ELEMENT_TYPE_EMPTY;
5997 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5998 (NXT(2) == 'Y')) {
5999 SKIP(3);
6000 /*
6001 * Element is a generic container.
6002 */
6003 ret = XML_ELEMENT_TYPE_ANY;
6004 } else if (RAW == '(') {
6005 ret = xmlParseElementContentDecl(ctxt, name, &content);
6006 } else {
6007 /*
6008 * [ WFC: PEs in Internal Subset ] error handling.
6009 */
6010 if ((RAW == '%') && (ctxt->external == 0) &&
6011 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006012 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006014 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006015 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006016 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6017 }
Owen Taylor3473f882001-02-23 17:55:21 +00006018 return(-1);
6019 }
6020
6021 SKIP_BLANKS;
6022 /*
6023 * Pop-up of finished entities.
6024 */
6025 while ((RAW == 0) && (ctxt->inputNr > 1))
6026 xmlPopInput(ctxt);
6027 SKIP_BLANKS;
6028
6029 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006030 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006031 if (content != NULL) {
6032 xmlFreeDocElementContent(ctxt->myDoc, content);
6033 }
Owen Taylor3473f882001-02-23 17:55:21 +00006034 } else {
6035 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006036 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6037 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006038 }
6039
6040 NEXT;
6041 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006042 (ctxt->sax->elementDecl != NULL)) {
6043 if (content != NULL)
6044 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006045 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6046 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006047 if ((content != NULL) && (content->parent == NULL)) {
6048 /*
6049 * this is a trick: if xmlAddElementDecl is called,
6050 * instead of copying the full tree it is plugged directly
6051 * if called from the parser. Avoid duplicating the
6052 * interfaces or change the API/ABI
6053 */
6054 xmlFreeDocElementContent(ctxt->myDoc, content);
6055 }
6056 } else if (content != NULL) {
6057 xmlFreeDocElementContent(ctxt->myDoc, content);
6058 }
Owen Taylor3473f882001-02-23 17:55:21 +00006059 }
Owen Taylor3473f882001-02-23 17:55:21 +00006060 }
6061 return(ret);
6062}
6063
6064/**
Owen Taylor3473f882001-02-23 17:55:21 +00006065 * xmlParseConditionalSections
6066 * @ctxt: an XML parser context
6067 *
6068 * [61] conditionalSect ::= includeSect | ignoreSect
6069 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6070 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6071 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6072 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6073 */
6074
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006075static void
Owen Taylor3473f882001-02-23 17:55:21 +00006076xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6077 SKIP(3);
6078 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006079 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006080 SKIP(7);
6081 SKIP_BLANKS;
6082 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006083 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006084 } else {
6085 NEXT;
6086 }
6087 if (xmlParserDebugEntities) {
6088 if ((ctxt->input != NULL) && (ctxt->input->filename))
6089 xmlGenericError(xmlGenericErrorContext,
6090 "%s(%d): ", ctxt->input->filename,
6091 ctxt->input->line);
6092 xmlGenericError(xmlGenericErrorContext,
6093 "Entering INCLUDE Conditional Section\n");
6094 }
6095
6096 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6097 (NXT(2) != '>'))) {
6098 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006099 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006100
6101 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6102 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006103 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006104 NEXT;
6105 } else if (RAW == '%') {
6106 xmlParsePEReference(ctxt);
6107 } else
6108 xmlParseMarkupDecl(ctxt);
6109
6110 /*
6111 * Pop-up of finished entities.
6112 */
6113 while ((RAW == 0) && (ctxt->inputNr > 1))
6114 xmlPopInput(ctxt);
6115
Daniel Veillardfdc91562002-07-01 21:52:03 +00006116 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006117 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 break;
6119 }
6120 }
6121 if (xmlParserDebugEntities) {
6122 if ((ctxt->input != NULL) && (ctxt->input->filename))
6123 xmlGenericError(xmlGenericErrorContext,
6124 "%s(%d): ", ctxt->input->filename,
6125 ctxt->input->line);
6126 xmlGenericError(xmlGenericErrorContext,
6127 "Leaving INCLUDE Conditional Section\n");
6128 }
6129
Daniel Veillarda07050d2003-10-19 14:46:32 +00006130 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006131 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006132 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006133 int depth = 0;
6134
6135 SKIP(6);
6136 SKIP_BLANKS;
6137 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006138 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006139 } else {
6140 NEXT;
6141 }
6142 if (xmlParserDebugEntities) {
6143 if ((ctxt->input != NULL) && (ctxt->input->filename))
6144 xmlGenericError(xmlGenericErrorContext,
6145 "%s(%d): ", ctxt->input->filename,
6146 ctxt->input->line);
6147 xmlGenericError(xmlGenericErrorContext,
6148 "Entering IGNORE Conditional Section\n");
6149 }
6150
6151 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006152 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006153 * But disable SAX event generating DTD building in the meantime
6154 */
6155 state = ctxt->disableSAX;
6156 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 ctxt->instate = XML_PARSER_IGNORE;
6159
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006160 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006161 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6162 depth++;
6163 SKIP(3);
6164 continue;
6165 }
6166 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6167 if (--depth >= 0) SKIP(3);
6168 continue;
6169 }
6170 NEXT;
6171 continue;
6172 }
6173
6174 ctxt->disableSAX = state;
6175 ctxt->instate = instate;
6176
6177 if (xmlParserDebugEntities) {
6178 if ((ctxt->input != NULL) && (ctxt->input->filename))
6179 xmlGenericError(xmlGenericErrorContext,
6180 "%s(%d): ", ctxt->input->filename,
6181 ctxt->input->line);
6182 xmlGenericError(xmlGenericErrorContext,
6183 "Leaving IGNORE Conditional Section\n");
6184 }
6185
6186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006187 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006188 }
6189
6190 if (RAW == 0)
6191 SHRINK;
6192
6193 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006194 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006195 } else {
6196 SKIP(3);
6197 }
6198}
6199
6200/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006201 * xmlParseMarkupDecl:
6202 * @ctxt: an XML parser context
6203 *
6204 * parse Markup declarations
6205 *
6206 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6207 * NotationDecl | PI | Comment
6208 *
6209 * [ VC: Proper Declaration/PE Nesting ]
6210 * Parameter-entity replacement text must be properly nested with
6211 * markup declarations. That is to say, if either the first character
6212 * or the last character of a markup declaration (markupdecl above) is
6213 * contained in the replacement text for a parameter-entity reference,
6214 * both must be contained in the same replacement text.
6215 *
6216 * [ WFC: PEs in Internal Subset ]
6217 * In the internal DTD subset, parameter-entity references can occur
6218 * only where markup declarations can occur, not within markup declarations.
6219 * (This does not apply to references that occur in external parameter
6220 * entities or to the external subset.)
6221 */
6222void
6223xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6224 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006225 if (CUR == '<') {
6226 if (NXT(1) == '!') {
6227 switch (NXT(2)) {
6228 case 'E':
6229 if (NXT(3) == 'L')
6230 xmlParseElementDecl(ctxt);
6231 else if (NXT(3) == 'N')
6232 xmlParseEntityDecl(ctxt);
6233 break;
6234 case 'A':
6235 xmlParseAttributeListDecl(ctxt);
6236 break;
6237 case 'N':
6238 xmlParseNotationDecl(ctxt);
6239 break;
6240 case '-':
6241 xmlParseComment(ctxt);
6242 break;
6243 default:
6244 /* there is an error but it will be detected later */
6245 break;
6246 }
6247 } else if (NXT(1) == '?') {
6248 xmlParsePI(ctxt);
6249 }
6250 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006251 /*
6252 * This is only for internal subset. On external entities,
6253 * the replacement is done before parsing stage
6254 */
6255 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6256 xmlParsePEReference(ctxt);
6257
6258 /*
6259 * Conditional sections are allowed from entities included
6260 * by PE References in the internal subset.
6261 */
6262 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6263 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6264 xmlParseConditionalSections(ctxt);
6265 }
6266 }
6267
6268 ctxt->instate = XML_PARSER_DTD;
6269}
6270
6271/**
6272 * xmlParseTextDecl:
6273 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006274 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006275 * parse an XML declaration header for external entities
6276 *
6277 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006278 */
6279
6280void
6281xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6282 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006283 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006284
6285 /*
6286 * We know that '<?xml' is here.
6287 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006289 SKIP(5);
6290 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006291 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006292 return;
6293 }
6294
William M. Brack76e95df2003-10-18 16:20:14 +00006295 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006296 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6297 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006298 }
6299 SKIP_BLANKS;
6300
6301 /*
6302 * We may have the VersionInfo here.
6303 */
6304 version = xmlParseVersionInfo(ctxt);
6305 if (version == NULL)
6306 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006307 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006308 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6310 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006311 }
6312 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006313 ctxt->input->version = version;
6314
6315 /*
6316 * We must have the encoding declaration
6317 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006318 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006319 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6320 /*
6321 * The XML REC instructs us to stop parsing right here
6322 */
6323 return;
6324 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006325 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6326 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6327 "Missing encoding in text declaration\n");
6328 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006329
6330 SKIP_BLANKS;
6331 if ((RAW == '?') && (NXT(1) == '>')) {
6332 SKIP(2);
6333 } else if (RAW == '>') {
6334 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006335 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006336 NEXT;
6337 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006338 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006339 MOVETO_ENDTAG(CUR_PTR);
6340 NEXT;
6341 }
6342}
6343
6344/**
Owen Taylor3473f882001-02-23 17:55:21 +00006345 * xmlParseExternalSubset:
6346 * @ctxt: an XML parser context
6347 * @ExternalID: the external identifier
6348 * @SystemID: the system identifier (or URL)
6349 *
6350 * parse Markup declarations from an external subset
6351 *
6352 * [30] extSubset ::= textDecl? extSubsetDecl
6353 *
6354 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6355 */
6356void
6357xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6358 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006359 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006360 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006361
6362 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6363 (ctxt->input->end - ctxt->input->cur >= 4)) {
6364 xmlChar start[4];
6365 xmlCharEncoding enc;
6366
6367 start[0] = RAW;
6368 start[1] = NXT(1);
6369 start[2] = NXT(2);
6370 start[3] = NXT(3);
6371 enc = xmlDetectCharEncoding(start, 4);
6372 if (enc != XML_CHAR_ENCODING_NONE)
6373 xmlSwitchEncoding(ctxt, enc);
6374 }
6375
Daniel Veillarda07050d2003-10-19 14:46:32 +00006376 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006377 xmlParseTextDecl(ctxt);
6378 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6379 /*
6380 * The XML REC instructs us to stop parsing right here
6381 */
6382 ctxt->instate = XML_PARSER_EOF;
6383 return;
6384 }
6385 }
6386 if (ctxt->myDoc == NULL) {
6387 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006388 if (ctxt->myDoc == NULL) {
6389 xmlErrMemory(ctxt, "New Doc failed");
6390 return;
6391 }
6392 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6395 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6396
6397 ctxt->instate = XML_PARSER_DTD;
6398 ctxt->external = 1;
6399 while (((RAW == '<') && (NXT(1) == '?')) ||
6400 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006401 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006402 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006403 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006404
6405 GROW;
6406 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6407 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006408 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006409 NEXT;
6410 } else if (RAW == '%') {
6411 xmlParsePEReference(ctxt);
6412 } else
6413 xmlParseMarkupDecl(ctxt);
6414
6415 /*
6416 * Pop-up of finished entities.
6417 */
6418 while ((RAW == 0) && (ctxt->inputNr > 1))
6419 xmlPopInput(ctxt);
6420
Daniel Veillardfdc91562002-07-01 21:52:03 +00006421 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006422 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006423 break;
6424 }
6425 }
6426
6427 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006428 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006429 }
6430
6431}
6432
6433/**
6434 * xmlParseReference:
6435 * @ctxt: an XML parser context
6436 *
6437 * parse and handle entity references in content, depending on the SAX
6438 * interface, this may end-up in a call to character() if this is a
6439 * CharRef, a predefined entity, if there is no reference() callback.
6440 * or if the parser was asked to switch to that mode.
6441 *
6442 * [67] Reference ::= EntityRef | CharRef
6443 */
6444void
6445xmlParseReference(xmlParserCtxtPtr ctxt) {
6446 xmlEntityPtr ent;
6447 xmlChar *val;
6448 if (RAW != '&') return;
6449
6450 if (NXT(1) == '#') {
6451 int i = 0;
6452 xmlChar out[10];
6453 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006454 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006455
Daniel Veillarddc171602008-03-26 17:41:38 +00006456 if (value == 0)
6457 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006458 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6459 /*
6460 * So we are using non-UTF-8 buffers
6461 * Check that the char fit on 8bits, if not
6462 * generate a CharRef.
6463 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006464 if (value <= 0xFF) {
6465 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006466 out[1] = 0;
6467 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6468 (!ctxt->disableSAX))
6469 ctxt->sax->characters(ctxt->userData, out, 1);
6470 } else {
6471 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006472 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006473 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006474 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006475 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6476 (!ctxt->disableSAX))
6477 ctxt->sax->reference(ctxt->userData, out);
6478 }
6479 } else {
6480 /*
6481 * Just encode the value in UTF-8
6482 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006483 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006484 out[i] = 0;
6485 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6486 (!ctxt->disableSAX))
6487 ctxt->sax->characters(ctxt->userData, out, i);
6488 }
6489 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006490 int was_checked;
6491
Owen Taylor3473f882001-02-23 17:55:21 +00006492 ent = xmlParseEntityRef(ctxt);
6493 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006494 if (!ctxt->wellFormed)
6495 return;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006496 ctxt->nbentities++;
6497 if (ctxt->nbentities >= 500000) {
6498 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6499 return;
6500 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006501 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006502 if ((ent->name != NULL) &&
6503 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6504 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006505 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006506
6507
6508 /*
6509 * The first reference to the entity trigger a parsing phase
6510 * where the ent->children is filled with the result from
6511 * the parsing.
6512 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006513 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006514 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006515
Owen Taylor3473f882001-02-23 17:55:21 +00006516 value = ent->content;
6517
6518 /*
6519 * Check that this entity is well formed
6520 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006521 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006522 (value[1] == 0) && (value[0] == '<') &&
6523 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6524 /*
6525 * DONE: get definite answer on this !!!
6526 * Lots of entity decls are used to declare a single
6527 * char
6528 * <!ENTITY lt "<">
6529 * Which seems to be valid since
6530 * 2.4: The ampersand character (&) and the left angle
6531 * bracket (<) may appear in their literal form only
6532 * when used ... They are also legal within the literal
6533 * entity value of an internal entity declaration;i
6534 * see "4.3.2 Well-Formed Parsed Entities".
6535 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6536 * Looking at the OASIS test suite and James Clark
6537 * tests, this is broken. However the XML REC uses
6538 * it. Is the XML REC not well-formed ????
6539 * This is a hack to avoid this problem
6540 *
6541 * ANSWER: since lt gt amp .. are already defined,
6542 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006543 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006544 * is lousy but acceptable.
6545 */
6546 list = xmlNewDocText(ctxt->myDoc, value);
6547 if (list != NULL) {
6548 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6549 (ent->children == NULL)) {
6550 ent->children = list;
6551 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006552 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006553 list->parent = (xmlNodePtr) ent;
6554 } else {
6555 xmlFreeNodeList(list);
6556 }
6557 } else if (list != NULL) {
6558 xmlFreeNodeList(list);
6559 }
6560 } else {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006561 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00006562 /*
6563 * 4.3.2: An internal general parsed entity is well-formed
6564 * if its replacement text matches the production labeled
6565 * content.
6566 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006567
6568 void *user_data;
6569 /*
6570 * This is a bit hackish but this seems the best
6571 * way to make sure both SAX and DOM entity support
6572 * behaves okay.
6573 */
6574 if (ctxt->userData == ctxt)
6575 user_data = NULL;
6576 else
6577 user_data = ctxt->userData;
6578
Owen Taylor3473f882001-02-23 17:55:21 +00006579 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6580 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006581 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6582 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006583 ctxt->depth--;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006584
Owen Taylor3473f882001-02-23 17:55:21 +00006585 } else if (ent->etype ==
6586 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6587 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006588 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006589 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006590 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006591 ctxt->depth--;
6592 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006593 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006594 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6595 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006596 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006597 ent->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00006598 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006599 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006600 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006601 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006602 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6603 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006604 (ent->children == NULL)) {
6605 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006606 if (ctxt->replaceEntities) {
6607 /*
6608 * Prune it directly in the generated document
6609 * except for single text nodes.
6610 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006611 if (((list->type == XML_TEXT_NODE) &&
6612 (list->next == NULL)) ||
6613 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006614 list->parent = (xmlNodePtr) ent;
6615 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006616 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006617 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006618 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006619 while (list != NULL) {
6620 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006621 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006622 if (list->next == NULL)
6623 ent->last = list;
6624 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006625 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006626 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006627#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006628 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6629 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006630#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006631 }
6632 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006633 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006634 while (list != NULL) {
6635 list->parent = (xmlNodePtr) ent;
6636 if (list->next == NULL)
6637 ent->last = list;
6638 list = list->next;
6639 }
Owen Taylor3473f882001-02-23 17:55:21 +00006640 }
6641 } else {
6642 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006643 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006644 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006645 } else if ((ret != XML_ERR_OK) &&
6646 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006647 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6648 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 } else if (list != NULL) {
6650 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006651 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006652 }
6653 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006654 if (ent->checked == 0)
6655 ent->checked = 1;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006656 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006657 ctxt->nbentities += ent->checked;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006658
6659 if (ent->children == NULL) {
6660 /*
6661 * Probably running in SAX mode and the callbacks don't
6662 * build the entity content. So unless we already went
6663 * though parsing for first checking go though the entity
6664 * content to generate callbacks associated to the entity
6665 */
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006666 if (was_checked != 0) {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006667 void *user_data;
6668 /*
6669 * This is a bit hackish but this seems the best
6670 * way to make sure both SAX and DOM entity support
6671 * behaves okay.
6672 */
6673 if (ctxt->userData == ctxt)
6674 user_data = NULL;
6675 else
6676 user_data = ctxt->userData;
6677
6678 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6679 ctxt->depth++;
6680 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6681 ent->content, user_data, NULL);
6682 ctxt->depth--;
6683 } else if (ent->etype ==
6684 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6685 ctxt->depth++;
6686 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6687 ctxt->sax, user_data, ctxt->depth,
6688 ent->URI, ent->ExternalID, NULL);
6689 ctxt->depth--;
6690 } else {
6691 ret = XML_ERR_ENTITY_PE_INTERNAL;
6692 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6693 "invalid entity type found\n", NULL);
6694 }
6695 if (ret == XML_ERR_ENTITY_LOOP) {
6696 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6697 return;
6698 }
6699 }
6700 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6701 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6702 /*
6703 * Entity reference callback comes second, it's somewhat
6704 * superfluous but a compatibility to historical behaviour
6705 */
6706 ctxt->sax->reference(ctxt->userData, ent->name);
6707 }
6708 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006709 }
6710 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006711 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006712 /*
6713 * Create a node.
6714 */
6715 ctxt->sax->reference(ctxt->userData, ent->name);
6716 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006717 }
6718 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006719 /*
6720 * There is a problem on the handling of _private for entities
6721 * (bug 155816): Should we copy the content of the field from
6722 * the entity (possibly overwriting some value set by the user
6723 * when a copy is created), should we leave it alone, or should
6724 * we try to take care of different situations? The problem
6725 * is exacerbated by the usage of this field by the xmlReader.
6726 * To fix this bug, we look at _private on the created node
6727 * and, if it's NULL, we copy in whatever was in the entity.
6728 * If it's not NULL we leave it alone. This is somewhat of a
6729 * hack - maybe we should have further tests to determine
6730 * what to do.
6731 */
Owen Taylor3473f882001-02-23 17:55:21 +00006732 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6733 /*
6734 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006735 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006736 * In the first occurrence list contains the replacement.
6737 * progressive == 2 means we are operating on the Reader
6738 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006739 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006740 if (((list == NULL) && (ent->owner == 0)) ||
6741 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006742 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006743
6744 /*
6745 * when operating on a reader, the entities definitions
6746 * are always owning the entities subtree.
6747 if (ctxt->parseMode == XML_PARSE_READER)
6748 ent->owner = 1;
6749 */
6750
Daniel Veillard62f313b2001-07-04 19:49:14 +00006751 cur = ent->children;
6752 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006753 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006754 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006755 if (nw->_private == NULL)
6756 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006757 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006758 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006759 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006760 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006761 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006762 if (cur == ent->last) {
6763 /*
6764 * needed to detect some strange empty
6765 * node cases in the reader tests
6766 */
6767 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006768 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006769 (nw->type == XML_ELEMENT_NODE) &&
6770 (nw->children == NULL))
6771 nw->extra = 1;
6772
Daniel Veillard62f313b2001-07-04 19:49:14 +00006773 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006774 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006775 cur = cur->next;
6776 }
Daniel Veillard81273902003-09-30 00:43:48 +00006777#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006778 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006779 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006780#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006781 } else if (list == NULL) {
6782 xmlNodePtr nw = NULL, cur, next, last,
6783 firstChild = NULL;
6784 /*
6785 * Copy the entity child list and make it the new
6786 * entity child list. The goal is to make sure any
6787 * ID or REF referenced will be the one from the
6788 * document content and not the entity copy.
6789 */
6790 cur = ent->children;
6791 ent->children = NULL;
6792 last = ent->last;
6793 ent->last = NULL;
6794 while (cur != NULL) {
6795 next = cur->next;
6796 cur->next = NULL;
6797 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006798 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006799 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006800 if (nw->_private == NULL)
6801 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006802 if (firstChild == NULL){
6803 firstChild = cur;
6804 }
6805 xmlAddChild((xmlNodePtr) ent, nw);
6806 xmlAddChild(ctxt->node, cur);
6807 }
6808 if (cur == last)
6809 break;
6810 cur = next;
6811 }
6812 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006813#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006814 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6815 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006816#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006817 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006818 const xmlChar *nbktext;
6819
Daniel Veillard62f313b2001-07-04 19:49:14 +00006820 /*
6821 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006822 * node with a possible previous text one which
6823 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006824 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006825 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6826 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006827 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006828 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006829 if ((ent->last != ent->children) &&
6830 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006831 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006832 xmlAddChildList(ctxt->node, ent->children);
6833 }
6834
Owen Taylor3473f882001-02-23 17:55:21 +00006835 /*
6836 * This is to avoid a nasty side effect, see
6837 * characters() in SAX.c
6838 */
6839 ctxt->nodemem = 0;
6840 ctxt->nodelen = 0;
6841 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006842 }
6843 }
6844 } else {
6845 val = ent->content;
6846 if (val == NULL) return;
6847 /*
6848 * inline the entity.
6849 */
6850 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6851 (!ctxt->disableSAX))
6852 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6853 }
6854 }
6855}
6856
6857/**
6858 * xmlParseEntityRef:
6859 * @ctxt: an XML parser context
6860 *
6861 * parse ENTITY references declarations
6862 *
6863 * [68] EntityRef ::= '&' Name ';'
6864 *
6865 * [ WFC: Entity Declared ]
6866 * In a document without any DTD, a document with only an internal DTD
6867 * subset which contains no parameter entity references, or a document
6868 * with "standalone='yes'", the Name given in the entity reference
6869 * must match that in an entity declaration, except that well-formed
6870 * documents need not declare any of the following entities: amp, lt,
6871 * gt, apos, quot. The declaration of a parameter entity must precede
6872 * any reference to it. Similarly, the declaration of a general entity
6873 * must precede any reference to it which appears in a default value in an
6874 * attribute-list declaration. Note that if entities are declared in the
6875 * external subset or in external parameter entities, a non-validating
6876 * processor is not obligated to read and process their declarations;
6877 * for such documents, the rule that an entity must be declared is a
6878 * well-formedness constraint only if standalone='yes'.
6879 *
6880 * [ WFC: Parsed Entity ]
6881 * An entity reference must not contain the name of an unparsed entity
6882 *
6883 * Returns the xmlEntityPtr if found, or NULL otherwise.
6884 */
6885xmlEntityPtr
6886xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006887 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006888 xmlEntityPtr ent = NULL;
6889
6890 GROW;
6891
6892 if (RAW == '&') {
6893 NEXT;
6894 name = xmlParseName(ctxt);
6895 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6897 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006898 } else {
6899 if (RAW == ';') {
6900 NEXT;
6901 /*
6902 * Ask first SAX for entity resolution, otherwise try the
6903 * predefined set.
6904 */
6905 if (ctxt->sax != NULL) {
6906 if (ctxt->sax->getEntity != NULL)
6907 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006908 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006909 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006910 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6911 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006912 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006913 }
Owen Taylor3473f882001-02-23 17:55:21 +00006914 }
6915 /*
6916 * [ WFC: Entity Declared ]
6917 * In a document without any DTD, a document with only an
6918 * internal DTD subset which contains no parameter entity
6919 * references, or a document with "standalone='yes'", the
6920 * Name given in the entity reference must match that in an
6921 * entity declaration, except that well-formed documents
6922 * need not declare any of the following entities: amp, lt,
6923 * gt, apos, quot.
6924 * The declaration of a parameter entity must precede any
6925 * reference to it.
6926 * Similarly, the declaration of a general entity must
6927 * precede any reference to it which appears in a default
6928 * value in an attribute-list declaration. Note that if
6929 * entities are declared in the external subset or in
6930 * external parameter entities, a non-validating processor
6931 * is not obligated to read and process their declarations;
6932 * for such documents, the rule that an entity must be
6933 * declared is a well-formedness constraint only if
6934 * standalone='yes'.
6935 */
6936 if (ent == NULL) {
6937 if ((ctxt->standalone == 1) ||
6938 ((ctxt->hasExternalSubset == 0) &&
6939 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006940 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006941 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006942 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006943 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006944 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006945 if ((ctxt->inSubset == 0) &&
6946 (ctxt->sax != NULL) &&
6947 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006948 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006949 }
Owen Taylor3473f882001-02-23 17:55:21 +00006950 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006951 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 }
6953
6954 /*
6955 * [ WFC: Parsed Entity ]
6956 * An entity reference must not contain the name of an
6957 * unparsed entity
6958 */
6959 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006960 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006961 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006962 }
6963
6964 /*
6965 * [ WFC: No External Entity References ]
6966 * Attribute values cannot contain direct or indirect
6967 * entity references to external entities.
6968 */
6969 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6970 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006971 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6972 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 }
6974 /*
6975 * [ WFC: No < in Attribute Values ]
6976 * The replacement text of any entity referred to directly or
6977 * indirectly in an attribute value (other than "&lt;") must
6978 * not contain a <.
6979 */
6980 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6981 (ent != NULL) &&
6982 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6983 (ent->content != NULL) &&
6984 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006985 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006986 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006987 }
6988
6989 /*
6990 * Internal check, no parameter entities here ...
6991 */
6992 else {
6993 switch (ent->etype) {
6994 case XML_INTERNAL_PARAMETER_ENTITY:
6995 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006996 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6997 "Attempt to reference the parameter entity '%s'\n",
6998 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006999 break;
7000 default:
7001 break;
7002 }
7003 }
7004
7005 /*
7006 * [ WFC: No Recursion ]
7007 * A parsed entity must not contain a recursive reference
7008 * to itself, either directly or indirectly.
7009 * Done somewhere else
7010 */
7011
7012 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007013 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007014 }
Owen Taylor3473f882001-02-23 17:55:21 +00007015 }
7016 }
7017 return(ent);
7018}
7019
7020/**
7021 * xmlParseStringEntityRef:
7022 * @ctxt: an XML parser context
7023 * @str: a pointer to an index in the string
7024 *
7025 * parse ENTITY references declarations, but this version parses it from
7026 * a string value.
7027 *
7028 * [68] EntityRef ::= '&' Name ';'
7029 *
7030 * [ WFC: Entity Declared ]
7031 * In a document without any DTD, a document with only an internal DTD
7032 * subset which contains no parameter entity references, or a document
7033 * with "standalone='yes'", the Name given in the entity reference
7034 * must match that in an entity declaration, except that well-formed
7035 * documents need not declare any of the following entities: amp, lt,
7036 * gt, apos, quot. The declaration of a parameter entity must precede
7037 * any reference to it. Similarly, the declaration of a general entity
7038 * must precede any reference to it which appears in a default value in an
7039 * attribute-list declaration. Note that if entities are declared in the
7040 * external subset or in external parameter entities, a non-validating
7041 * processor is not obligated to read and process their declarations;
7042 * for such documents, the rule that an entity must be declared is a
7043 * well-formedness constraint only if standalone='yes'.
7044 *
7045 * [ WFC: Parsed Entity ]
7046 * An entity reference must not contain the name of an unparsed entity
7047 *
7048 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7049 * is updated to the current location in the string.
7050 */
7051xmlEntityPtr
7052xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7053 xmlChar *name;
7054 const xmlChar *ptr;
7055 xmlChar cur;
7056 xmlEntityPtr ent = NULL;
7057
7058 if ((str == NULL) || (*str == NULL))
7059 return(NULL);
7060 ptr = *str;
7061 cur = *ptr;
7062 if (cur == '&') {
7063 ptr++;
7064 cur = *ptr;
7065 name = xmlParseStringName(ctxt, &ptr);
7066 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007067 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7068 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007069 } else {
7070 if (*ptr == ';') {
7071 ptr++;
7072 /*
7073 * Ask first SAX for entity resolution, otherwise try the
7074 * predefined set.
7075 */
7076 if (ctxt->sax != NULL) {
7077 if (ctxt->sax->getEntity != NULL)
7078 ent = ctxt->sax->getEntity(ctxt->userData, name);
7079 if (ent == NULL)
7080 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007081 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00007082 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007083 }
Owen Taylor3473f882001-02-23 17:55:21 +00007084 }
7085 /*
7086 * [ WFC: Entity Declared ]
7087 * In a document without any DTD, a document with only an
7088 * internal DTD subset which contains no parameter entity
7089 * references, or a document with "standalone='yes'", the
7090 * Name given in the entity reference must match that in an
7091 * entity declaration, except that well-formed documents
7092 * need not declare any of the following entities: amp, lt,
7093 * gt, apos, quot.
7094 * The declaration of a parameter entity must precede any
7095 * reference to it.
7096 * Similarly, the declaration of a general entity must
7097 * precede any reference to it which appears in a default
7098 * value in an attribute-list declaration. Note that if
7099 * entities are declared in the external subset or in
7100 * external parameter entities, a non-validating processor
7101 * is not obligated to read and process their declarations;
7102 * for such documents, the rule that an entity must be
7103 * declared is a well-formedness constraint only if
7104 * standalone='yes'.
7105 */
7106 if (ent == NULL) {
7107 if ((ctxt->standalone == 1) ||
7108 ((ctxt->hasExternalSubset == 0) &&
7109 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007110 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007111 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007112 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007113 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00007114 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007115 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007116 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00007117 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00007118 }
7119
7120 /*
7121 * [ WFC: Parsed Entity ]
7122 * An entity reference must not contain the name of an
7123 * unparsed entity
7124 */
7125 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007126 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007127 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 }
7129
7130 /*
7131 * [ WFC: No External Entity References ]
7132 * Attribute values cannot contain direct or indirect
7133 * entity references to external entities.
7134 */
7135 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7136 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007137 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00007138 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007139 }
7140 /*
7141 * [ WFC: No < in Attribute Values ]
7142 * The replacement text of any entity referred to directly or
7143 * indirectly in an attribute value (other than "&lt;") must
7144 * not contain a <.
7145 */
7146 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7147 (ent != NULL) &&
7148 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7149 (ent->content != NULL) &&
7150 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007151 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7152 "'<' in entity '%s' is not allowed in attributes values\n",
7153 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007154 }
7155
7156 /*
7157 * Internal check, no parameter entities here ...
7158 */
7159 else {
7160 switch (ent->etype) {
7161 case XML_INTERNAL_PARAMETER_ENTITY:
7162 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00007163 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7164 "Attempt to reference the parameter entity '%s'\n",
7165 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007166 break;
7167 default:
7168 break;
7169 }
7170 }
7171
7172 /*
7173 * [ WFC: No Recursion ]
7174 * A parsed entity must not contain a recursive reference
7175 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007176 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00007177 */
7178
7179 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007180 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007181 }
7182 xmlFree(name);
7183 }
7184 }
7185 *str = ptr;
7186 return(ent);
7187}
7188
7189/**
7190 * xmlParsePEReference:
7191 * @ctxt: an XML parser context
7192 *
7193 * parse PEReference declarations
7194 * The entity content is handled directly by pushing it's content as
7195 * a new input stream.
7196 *
7197 * [69] PEReference ::= '%' Name ';'
7198 *
7199 * [ WFC: No Recursion ]
7200 * A parsed entity must not contain a recursive
7201 * reference to itself, either directly or indirectly.
7202 *
7203 * [ WFC: Entity Declared ]
7204 * In a document without any DTD, a document with only an internal DTD
7205 * subset which contains no parameter entity references, or a document
7206 * with "standalone='yes'", ... ... The declaration of a parameter
7207 * entity must precede any reference to it...
7208 *
7209 * [ VC: Entity Declared ]
7210 * In a document with an external subset or external parameter entities
7211 * with "standalone='no'", ... ... The declaration of a parameter entity
7212 * must precede any reference to it...
7213 *
7214 * [ WFC: In DTD ]
7215 * Parameter-entity references may only appear in the DTD.
7216 * NOTE: misleading but this is handled.
7217 */
7218void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007219xmlParsePEReference(xmlParserCtxtPtr ctxt)
7220{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007221 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007222 xmlEntityPtr entity = NULL;
7223 xmlParserInputPtr input;
7224
7225 if (RAW == '%') {
7226 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00007227 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00007228 if (name == NULL) {
7229 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7230 "xmlParsePEReference: no name\n");
7231 } else {
7232 if (RAW == ';') {
7233 NEXT;
7234 if ((ctxt->sax != NULL) &&
7235 (ctxt->sax->getParameterEntity != NULL))
7236 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7237 name);
7238 if (entity == NULL) {
7239 /*
7240 * [ WFC: Entity Declared ]
7241 * In a document without any DTD, a document with only an
7242 * internal DTD subset which contains no parameter entity
7243 * references, or a document with "standalone='yes'", ...
7244 * ... The declaration of a parameter entity must precede
7245 * any reference to it...
7246 */
7247 if ((ctxt->standalone == 1) ||
7248 ((ctxt->hasExternalSubset == 0) &&
7249 (ctxt->hasPErefs == 0))) {
7250 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7251 "PEReference: %%%s; not found\n",
7252 name);
7253 } else {
7254 /*
7255 * [ VC: Entity Declared ]
7256 * In a document with an external subset or external
7257 * parameter entities with "standalone='no'", ...
7258 * ... The declaration of a parameter entity must
7259 * precede any reference to it...
7260 */
7261 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7262 "PEReference: %%%s; not found\n",
7263 name, NULL);
7264 ctxt->valid = 0;
7265 }
7266 } else {
7267 /*
7268 * Internal checking in case the entity quest barfed
7269 */
7270 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7271 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7272 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7273 "Internal: %%%s; is not a parameter entity\n",
7274 name, NULL);
7275 } else if (ctxt->input->free != deallocblankswrapper) {
7276 input =
7277 xmlNewBlanksWrapperInputStream(ctxt, entity);
7278 xmlPushInput(ctxt, input);
7279 } else {
7280 /*
7281 * TODO !!!
7282 * handle the extra spaces added before and after
7283 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7284 */
7285 input = xmlNewEntityInputStream(ctxt, entity);
7286 xmlPushInput(ctxt, input);
7287 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00007288 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00007289 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00007290 xmlParseTextDecl(ctxt);
7291 if (ctxt->errNo ==
7292 XML_ERR_UNSUPPORTED_ENCODING) {
7293 /*
7294 * The XML REC instructs us to stop parsing
7295 * right here
7296 */
7297 ctxt->instate = XML_PARSER_EOF;
7298 return;
7299 }
7300 }
7301 }
7302 }
7303 ctxt->hasPErefs = 1;
7304 } else {
7305 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7306 }
7307 }
Owen Taylor3473f882001-02-23 17:55:21 +00007308 }
7309}
7310
7311/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007312 * xmlLoadEntityContent:
7313 * @ctxt: an XML parser context
7314 * @entity: an unloaded system entity
7315 *
7316 * Load the original content of the given system entity from the
7317 * ExternalID/SystemID given. This is to be used for Included in Literal
7318 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7319 *
7320 * Returns 0 in case of success and -1 in case of failure
7321 */
7322static int
7323xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7324 xmlParserInputPtr input;
7325 xmlBufferPtr buf;
7326 int l, c;
7327 int count = 0;
7328
7329 if ((ctxt == NULL) || (entity == NULL) ||
7330 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7331 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7332 (entity->content != NULL)) {
7333 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7334 "xmlLoadEntityContent parameter error");
7335 return(-1);
7336 }
7337
7338 if (xmlParserDebugEntities)
7339 xmlGenericError(xmlGenericErrorContext,
7340 "Reading %s entity content input\n", entity->name);
7341
7342 buf = xmlBufferCreate();
7343 if (buf == NULL) {
7344 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7345 "xmlLoadEntityContent parameter error");
7346 return(-1);
7347 }
7348
7349 input = xmlNewEntityInputStream(ctxt, entity);
7350 if (input == NULL) {
7351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7352 "xmlLoadEntityContent input error");
7353 xmlBufferFree(buf);
7354 return(-1);
7355 }
7356
7357 /*
7358 * Push the entity as the current input, read char by char
7359 * saving to the buffer until the end of the entity or an error
7360 */
7361 xmlPushInput(ctxt, input);
7362 GROW;
7363 c = CUR_CHAR(l);
7364 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7365 (IS_CHAR(c))) {
7366 xmlBufferAdd(buf, ctxt->input->cur, l);
7367 if (count++ > 100) {
7368 count = 0;
7369 GROW;
7370 }
7371 NEXTL(l);
7372 c = CUR_CHAR(l);
7373 }
7374
7375 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7376 xmlPopInput(ctxt);
7377 } else if (!IS_CHAR(c)) {
7378 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7379 "xmlLoadEntityContent: invalid char value %d\n",
7380 c);
7381 xmlBufferFree(buf);
7382 return(-1);
7383 }
7384 entity->content = buf->content;
7385 buf->content = NULL;
7386 xmlBufferFree(buf);
7387
7388 return(0);
7389}
7390
7391/**
Owen Taylor3473f882001-02-23 17:55:21 +00007392 * xmlParseStringPEReference:
7393 * @ctxt: an XML parser context
7394 * @str: a pointer to an index in the string
7395 *
7396 * parse PEReference declarations
7397 *
7398 * [69] PEReference ::= '%' Name ';'
7399 *
7400 * [ WFC: No Recursion ]
7401 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007402 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007403 *
7404 * [ WFC: Entity Declared ]
7405 * In a document without any DTD, a document with only an internal DTD
7406 * subset which contains no parameter entity references, or a document
7407 * with "standalone='yes'", ... ... The declaration of a parameter
7408 * entity must precede any reference to it...
7409 *
7410 * [ VC: Entity Declared ]
7411 * In a document with an external subset or external parameter entities
7412 * with "standalone='no'", ... ... The declaration of a parameter entity
7413 * must precede any reference to it...
7414 *
7415 * [ WFC: In DTD ]
7416 * Parameter-entity references may only appear in the DTD.
7417 * NOTE: misleading but this is handled.
7418 *
7419 * Returns the string of the entity content.
7420 * str is updated to the current value of the index
7421 */
7422xmlEntityPtr
7423xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7424 const xmlChar *ptr;
7425 xmlChar cur;
7426 xmlChar *name;
7427 xmlEntityPtr entity = NULL;
7428
7429 if ((str == NULL) || (*str == NULL)) return(NULL);
7430 ptr = *str;
7431 cur = *ptr;
7432 if (cur == '%') {
7433 ptr++;
7434 cur = *ptr;
7435 name = xmlParseStringName(ctxt, &ptr);
7436 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007437 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7438 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007439 } else {
7440 cur = *ptr;
7441 if (cur == ';') {
7442 ptr++;
7443 cur = *ptr;
7444 if ((ctxt->sax != NULL) &&
7445 (ctxt->sax->getParameterEntity != NULL))
7446 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7447 name);
7448 if (entity == NULL) {
7449 /*
7450 * [ WFC: Entity Declared ]
7451 * In a document without any DTD, a document with only an
7452 * internal DTD subset which contains no parameter entity
7453 * references, or a document with "standalone='yes'", ...
7454 * ... The declaration of a parameter entity must precede
7455 * any reference to it...
7456 */
7457 if ((ctxt->standalone == 1) ||
7458 ((ctxt->hasExternalSubset == 0) &&
7459 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007460 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007461 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007462 } else {
7463 /*
7464 * [ VC: Entity Declared ]
7465 * In a document with an external subset or external
7466 * parameter entities with "standalone='no'", ...
7467 * ... The declaration of a parameter entity must
7468 * precede any reference to it...
7469 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007470 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7471 "PEReference: %%%s; not found\n",
7472 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007473 ctxt->valid = 0;
7474 }
7475 } else {
7476 /*
7477 * Internal checking in case the entity quest barfed
7478 */
7479 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7480 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007481 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7482 "%%%s; is not a parameter entity\n",
7483 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007484 }
7485 }
7486 ctxt->hasPErefs = 1;
7487 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007488 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007489 }
7490 xmlFree(name);
7491 }
7492 }
7493 *str = ptr;
7494 return(entity);
7495}
7496
7497/**
7498 * xmlParseDocTypeDecl:
7499 * @ctxt: an XML parser context
7500 *
7501 * parse a DOCTYPE declaration
7502 *
7503 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7504 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7505 *
7506 * [ VC: Root Element Type ]
7507 * The Name in the document type declaration must match the element
7508 * type of the root element.
7509 */
7510
7511void
7512xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007513 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007514 xmlChar *ExternalID = NULL;
7515 xmlChar *URI = NULL;
7516
7517 /*
7518 * We know that '<!DOCTYPE' has been detected.
7519 */
7520 SKIP(9);
7521
7522 SKIP_BLANKS;
7523
7524 /*
7525 * Parse the DOCTYPE name.
7526 */
7527 name = xmlParseName(ctxt);
7528 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007529 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7530 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007531 }
7532 ctxt->intSubName = name;
7533
7534 SKIP_BLANKS;
7535
7536 /*
7537 * Check for SystemID and ExternalID
7538 */
7539 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7540
7541 if ((URI != NULL) || (ExternalID != NULL)) {
7542 ctxt->hasExternalSubset = 1;
7543 }
7544 ctxt->extSubURI = URI;
7545 ctxt->extSubSystem = ExternalID;
7546
7547 SKIP_BLANKS;
7548
7549 /*
7550 * Create and update the internal subset.
7551 */
7552 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7553 (!ctxt->disableSAX))
7554 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7555
7556 /*
7557 * Is there any internal subset declarations ?
7558 * they are handled separately in xmlParseInternalSubset()
7559 */
7560 if (RAW == '[')
7561 return;
7562
7563 /*
7564 * We should be at the end of the DOCTYPE declaration.
7565 */
7566 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007567 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007568 }
7569 NEXT;
7570}
7571
7572/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007573 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007574 * @ctxt: an XML parser context
7575 *
7576 * parse the internal subset declaration
7577 *
7578 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7579 */
7580
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007581static void
Owen Taylor3473f882001-02-23 17:55:21 +00007582xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7583 /*
7584 * Is there any DTD definition ?
7585 */
7586 if (RAW == '[') {
7587 ctxt->instate = XML_PARSER_DTD;
7588 NEXT;
7589 /*
7590 * Parse the succession of Markup declarations and
7591 * PEReferences.
7592 * Subsequence (markupdecl | PEReference | S)*
7593 */
7594 while (RAW != ']') {
7595 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007596 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007597
7598 SKIP_BLANKS;
7599 xmlParseMarkupDecl(ctxt);
7600 xmlParsePEReference(ctxt);
7601
7602 /*
7603 * Pop-up of finished entities.
7604 */
7605 while ((RAW == 0) && (ctxt->inputNr > 1))
7606 xmlPopInput(ctxt);
7607
7608 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007609 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007610 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007611 break;
7612 }
7613 }
7614 if (RAW == ']') {
7615 NEXT;
7616 SKIP_BLANKS;
7617 }
7618 }
7619
7620 /*
7621 * We should be at the end of the DOCTYPE declaration.
7622 */
7623 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007624 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007625 }
7626 NEXT;
7627}
7628
Daniel Veillard81273902003-09-30 00:43:48 +00007629#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007630/**
7631 * xmlParseAttribute:
7632 * @ctxt: an XML parser context
7633 * @value: a xmlChar ** used to store the value of the attribute
7634 *
7635 * parse an attribute
7636 *
7637 * [41] Attribute ::= Name Eq AttValue
7638 *
7639 * [ WFC: No External Entity References ]
7640 * Attribute values cannot contain direct or indirect entity references
7641 * to external entities.
7642 *
7643 * [ WFC: No < in Attribute Values ]
7644 * The replacement text of any entity referred to directly or indirectly in
7645 * an attribute value (other than "&lt;") must not contain a <.
7646 *
7647 * [ VC: Attribute Value Type ]
7648 * The attribute must have been declared; the value must be of the type
7649 * declared for it.
7650 *
7651 * [25] Eq ::= S? '=' S?
7652 *
7653 * With namespace:
7654 *
7655 * [NS 11] Attribute ::= QName Eq AttValue
7656 *
7657 * Also the case QName == xmlns:??? is handled independently as a namespace
7658 * definition.
7659 *
7660 * Returns the attribute name, and the value in *value.
7661 */
7662
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007663const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007664xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007665 const xmlChar *name;
7666 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007667
7668 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007669 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007670 name = xmlParseName(ctxt);
7671 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007672 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007673 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007674 return(NULL);
7675 }
7676
7677 /*
7678 * read the value
7679 */
7680 SKIP_BLANKS;
7681 if (RAW == '=') {
7682 NEXT;
7683 SKIP_BLANKS;
7684 val = xmlParseAttValue(ctxt);
7685 ctxt->instate = XML_PARSER_CONTENT;
7686 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007687 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007688 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007689 return(NULL);
7690 }
7691
7692 /*
7693 * Check that xml:lang conforms to the specification
7694 * No more registered as an error, just generate a warning now
7695 * since this was deprecated in XML second edition
7696 */
7697 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7698 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007699 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7700 "Malformed value for xml:lang : %s\n",
7701 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007702 }
7703 }
7704
7705 /*
7706 * Check that xml:space conforms to the specification
7707 */
7708 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7709 if (xmlStrEqual(val, BAD_CAST "default"))
7710 *(ctxt->space) = 0;
7711 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7712 *(ctxt->space) = 1;
7713 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007714 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007715"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007716 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007717 }
7718 }
7719
7720 *value = val;
7721 return(name);
7722}
7723
7724/**
7725 * xmlParseStartTag:
7726 * @ctxt: an XML parser context
7727 *
7728 * parse a start of tag either for rule element or
7729 * EmptyElement. In both case we don't parse the tag closing chars.
7730 *
7731 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7732 *
7733 * [ WFC: Unique Att Spec ]
7734 * No attribute name may appear more than once in the same start-tag or
7735 * empty-element tag.
7736 *
7737 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7738 *
7739 * [ WFC: Unique Att Spec ]
7740 * No attribute name may appear more than once in the same start-tag or
7741 * empty-element tag.
7742 *
7743 * With namespace:
7744 *
7745 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7746 *
7747 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7748 *
7749 * Returns the element name parsed
7750 */
7751
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007752const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007753xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007754 const xmlChar *name;
7755 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007756 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007757 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007758 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007759 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007760 int i;
7761
7762 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007763 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007764
7765 name = xmlParseName(ctxt);
7766 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007768 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007769 return(NULL);
7770 }
7771
7772 /*
7773 * Now parse the attributes, it ends up with the ending
7774 *
7775 * (S Attribute)* S?
7776 */
7777 SKIP_BLANKS;
7778 GROW;
7779
Daniel Veillard21a0f912001-02-25 19:54:14 +00007780 while ((RAW != '>') &&
7781 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007782 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007783 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007784 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007785
7786 attname = xmlParseAttribute(ctxt, &attvalue);
7787 if ((attname != NULL) && (attvalue != NULL)) {
7788 /*
7789 * [ WFC: Unique Att Spec ]
7790 * No attribute name may appear more than once in the same
7791 * start-tag or empty-element tag.
7792 */
7793 for (i = 0; i < nbatts;i += 2) {
7794 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007795 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007796 xmlFree(attvalue);
7797 goto failed;
7798 }
7799 }
Owen Taylor3473f882001-02-23 17:55:21 +00007800 /*
7801 * Add the pair to atts
7802 */
7803 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007804 maxatts = 22; /* allow for 10 attrs by default */
7805 atts = (const xmlChar **)
7806 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007807 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007808 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007809 if (attvalue != NULL)
7810 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007811 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007812 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007813 ctxt->atts = atts;
7814 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007815 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007816 const xmlChar **n;
7817
Owen Taylor3473f882001-02-23 17:55:21 +00007818 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007819 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007820 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007821 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007822 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007823 if (attvalue != NULL)
7824 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007825 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007826 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007827 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007828 ctxt->atts = atts;
7829 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007830 }
7831 atts[nbatts++] = attname;
7832 atts[nbatts++] = attvalue;
7833 atts[nbatts] = NULL;
7834 atts[nbatts + 1] = NULL;
7835 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007836 if (attvalue != NULL)
7837 xmlFree(attvalue);
7838 }
7839
7840failed:
7841
Daniel Veillard3772de32002-12-17 10:31:45 +00007842 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007843 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7844 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007845 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007846 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7847 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007848 }
7849 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007850 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7851 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007852 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7853 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007854 break;
7855 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007856 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007857 GROW;
7858 }
7859
7860 /*
7861 * SAX: Start of Element !
7862 */
7863 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007864 (!ctxt->disableSAX)) {
7865 if (nbatts > 0)
7866 ctxt->sax->startElement(ctxt->userData, name, atts);
7867 else
7868 ctxt->sax->startElement(ctxt->userData, name, NULL);
7869 }
Owen Taylor3473f882001-02-23 17:55:21 +00007870
7871 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007872 /* Free only the content strings */
7873 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007874 if (atts[i] != NULL)
7875 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007876 }
7877 return(name);
7878}
7879
7880/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007881 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007882 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007883 * @line: line of the start tag
7884 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007885 *
7886 * parse an end of tag
7887 *
7888 * [42] ETag ::= '</' Name S? '>'
7889 *
7890 * With namespace
7891 *
7892 * [NS 9] ETag ::= '</' QName S? '>'
7893 */
7894
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007895static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007896xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007897 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007898
7899 GROW;
7900 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007901 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007902 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007903 return;
7904 }
7905 SKIP(2);
7906
Daniel Veillard46de64e2002-05-29 08:21:33 +00007907 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007908
7909 /*
7910 * We should definitely be at the ending "S? '>'" part
7911 */
7912 GROW;
7913 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007914 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007915 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007916 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007917 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007918
7919 /*
7920 * [ WFC: Element Type Match ]
7921 * The Name in an element's end-tag must match the element type in the
7922 * start-tag.
7923 *
7924 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007925 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007926 if (name == NULL) name = BAD_CAST "unparseable";
7927 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007928 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007929 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007930 }
7931
7932 /*
7933 * SAX: End of Tag
7934 */
7935 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7936 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007937 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007938
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007940 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007941 return;
7942}
7943
7944/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007945 * xmlParseEndTag:
7946 * @ctxt: an XML parser context
7947 *
7948 * parse an end of tag
7949 *
7950 * [42] ETag ::= '</' Name S? '>'
7951 *
7952 * With namespace
7953 *
7954 * [NS 9] ETag ::= '</' QName S? '>'
7955 */
7956
7957void
7958xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 xmlParseEndTag1(ctxt, 0);
7960}
Daniel Veillard81273902003-09-30 00:43:48 +00007961#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962
7963/************************************************************************
7964 * *
7965 * SAX 2 specific operations *
7966 * *
7967 ************************************************************************/
7968
Daniel Veillard0fb18932003-09-07 09:14:37 +00007969/*
7970 * xmlGetNamespace:
7971 * @ctxt: an XML parser context
7972 * @prefix: the prefix to lookup
7973 *
7974 * Lookup the namespace name for the @prefix (which ca be NULL)
7975 * The prefix must come from the @ctxt->dict dictionnary
7976 *
7977 * Returns the namespace name or NULL if not bound
7978 */
7979static const xmlChar *
7980xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7981 int i;
7982
Daniel Veillarde57ec792003-09-10 10:50:59 +00007983 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007985 if (ctxt->nsTab[i] == prefix) {
7986 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7987 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007988 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007989 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990 return(NULL);
7991}
7992
7993/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007994 * xmlParseQName:
7995 * @ctxt: an XML parser context
7996 * @prefix: pointer to store the prefix part
7997 *
7998 * parse an XML Namespace QName
7999 *
8000 * [6] QName ::= (Prefix ':')? LocalPart
8001 * [7] Prefix ::= NCName
8002 * [8] LocalPart ::= NCName
8003 *
8004 * Returns the Name parsed or NULL
8005 */
8006
8007static const xmlChar *
8008xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8009 const xmlChar *l, *p;
8010
8011 GROW;
8012
8013 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008014 if (l == NULL) {
8015 if (CUR == ':') {
8016 l = xmlParseName(ctxt);
8017 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008018 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8019 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008020 *prefix = NULL;
8021 return(l);
8022 }
8023 }
8024 return(NULL);
8025 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026 if (CUR == ':') {
8027 NEXT;
8028 p = l;
8029 l = xmlParseNCName(ctxt);
8030 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008031 xmlChar *tmp;
8032
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008033 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8034 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008035 l = xmlParseNmtoken(ctxt);
8036 if (l == NULL)
8037 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8038 else {
8039 tmp = xmlBuildQName(l, p, NULL, 0);
8040 xmlFree((char *)l);
8041 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008042 p = xmlDictLookup(ctxt->dict, tmp, -1);
8043 if (tmp != NULL) xmlFree(tmp);
8044 *prefix = NULL;
8045 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008046 }
8047 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008048 xmlChar *tmp;
8049
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008050 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8051 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008052 NEXT;
8053 tmp = (xmlChar *) xmlParseName(ctxt);
8054 if (tmp != NULL) {
8055 tmp = xmlBuildQName(tmp, l, NULL, 0);
8056 l = xmlDictLookup(ctxt->dict, tmp, -1);
8057 if (tmp != NULL) xmlFree(tmp);
8058 *prefix = p;
8059 return(l);
8060 }
8061 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8062 l = xmlDictLookup(ctxt->dict, tmp, -1);
8063 if (tmp != NULL) xmlFree(tmp);
8064 *prefix = p;
8065 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008066 }
8067 *prefix = p;
8068 } else
8069 *prefix = NULL;
8070 return(l);
8071}
8072
8073/**
8074 * xmlParseQNameAndCompare:
8075 * @ctxt: an XML parser context
8076 * @name: the localname
8077 * @prefix: the prefix, if any.
8078 *
8079 * parse an XML name and compares for match
8080 * (specialized for endtag parsing)
8081 *
8082 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8083 * and the name for mismatch
8084 */
8085
8086static const xmlChar *
8087xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8088 xmlChar const *prefix) {
8089 const xmlChar *cmp = name;
8090 const xmlChar *in;
8091 const xmlChar *ret;
8092 const xmlChar *prefix2;
8093
8094 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8095
8096 GROW;
8097 in = ctxt->input->cur;
8098
8099 cmp = prefix;
8100 while (*in != 0 && *in == *cmp) {
8101 ++in;
8102 ++cmp;
8103 }
8104 if ((*cmp == 0) && (*in == ':')) {
8105 in++;
8106 cmp = name;
8107 while (*in != 0 && *in == *cmp) {
8108 ++in;
8109 ++cmp;
8110 }
William M. Brack76e95df2003-10-18 16:20:14 +00008111 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008112 /* success */
8113 ctxt->input->cur = in;
8114 return((const xmlChar*) 1);
8115 }
8116 }
8117 /*
8118 * all strings coms from the dictionary, equality can be done directly
8119 */
8120 ret = xmlParseQName (ctxt, &prefix2);
8121 if ((ret == name) && (prefix == prefix2))
8122 return((const xmlChar*) 1);
8123 return ret;
8124}
8125
8126/**
8127 * xmlParseAttValueInternal:
8128 * @ctxt: an XML parser context
8129 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008130 * @alloc: whether the attribute was reallocated as a new string
8131 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008132 *
8133 * parse a value for an attribute.
8134 * NOTE: if no normalization is needed, the routine will return pointers
8135 * directly from the data buffer.
8136 *
8137 * 3.3.3 Attribute-Value Normalization:
8138 * Before the value of an attribute is passed to the application or
8139 * checked for validity, the XML processor must normalize it as follows:
8140 * - a character reference is processed by appending the referenced
8141 * character to the attribute value
8142 * - an entity reference is processed by recursively processing the
8143 * replacement text of the entity
8144 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8145 * appending #x20 to the normalized value, except that only a single
8146 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8147 * parsed entity or the literal entity value of an internal parsed entity
8148 * - other characters are processed by appending them to the normalized value
8149 * If the declared value is not CDATA, then the XML processor must further
8150 * process the normalized attribute value by discarding any leading and
8151 * trailing space (#x20) characters, and by replacing sequences of space
8152 * (#x20) characters by a single space (#x20) character.
8153 * All attributes for which no declaration has been read should be treated
8154 * by a non-validating parser as if declared CDATA.
8155 *
8156 * Returns the AttValue parsed or NULL. The value has to be freed by the
8157 * caller if it was copied, this can be detected by val[*len] == 0.
8158 */
8159
8160static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008161xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8162 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008163{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008164 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008165 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008166 xmlChar *ret = NULL;
8167
8168 GROW;
8169 in = (xmlChar *) CUR_PTR;
8170 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008171 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008172 return (NULL);
8173 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008174 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008175
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008176 /*
8177 * try to handle in this routine the most common case where no
8178 * allocation of a new string is required and where content is
8179 * pure ASCII.
8180 */
8181 limit = *in++;
8182 end = ctxt->input->end;
8183 start = in;
8184 if (in >= end) {
8185 const xmlChar *oldbase = ctxt->input->base;
8186 GROW;
8187 if (oldbase != ctxt->input->base) {
8188 long delta = ctxt->input->base - oldbase;
8189 start = start + delta;
8190 in = in + delta;
8191 }
8192 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008194 if (normalize) {
8195 /*
8196 * Skip any leading spaces
8197 */
8198 while ((in < end) && (*in != limit) &&
8199 ((*in == 0x20) || (*in == 0x9) ||
8200 (*in == 0xA) || (*in == 0xD))) {
8201 in++;
8202 start = in;
8203 if (in >= end) {
8204 const xmlChar *oldbase = ctxt->input->base;
8205 GROW;
8206 if (oldbase != ctxt->input->base) {
8207 long delta = ctxt->input->base - oldbase;
8208 start = start + delta;
8209 in = in + delta;
8210 }
8211 end = ctxt->input->end;
8212 }
8213 }
8214 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8215 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8216 if ((*in++ == 0x20) && (*in == 0x20)) break;
8217 if (in >= end) {
8218 const xmlChar *oldbase = ctxt->input->base;
8219 GROW;
8220 if (oldbase != ctxt->input->base) {
8221 long delta = ctxt->input->base - oldbase;
8222 start = start + delta;
8223 in = in + delta;
8224 }
8225 end = ctxt->input->end;
8226 }
8227 }
8228 last = in;
8229 /*
8230 * skip the trailing blanks
8231 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008232 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008233 while ((in < end) && (*in != limit) &&
8234 ((*in == 0x20) || (*in == 0x9) ||
8235 (*in == 0xA) || (*in == 0xD))) {
8236 in++;
8237 if (in >= end) {
8238 const xmlChar *oldbase = ctxt->input->base;
8239 GROW;
8240 if (oldbase != ctxt->input->base) {
8241 long delta = ctxt->input->base - oldbase;
8242 start = start + delta;
8243 in = in + delta;
8244 last = last + delta;
8245 }
8246 end = ctxt->input->end;
8247 }
8248 }
8249 if (*in != limit) goto need_complex;
8250 } else {
8251 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8252 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8253 in++;
8254 if (in >= end) {
8255 const xmlChar *oldbase = ctxt->input->base;
8256 GROW;
8257 if (oldbase != ctxt->input->base) {
8258 long delta = ctxt->input->base - oldbase;
8259 start = start + delta;
8260 in = in + delta;
8261 }
8262 end = ctxt->input->end;
8263 }
8264 }
8265 last = in;
8266 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008267 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008268 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008269 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008270 *len = last - start;
8271 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008273 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008274 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008275 }
8276 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008277 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008279need_complex:
8280 if (alloc) *alloc = 1;
8281 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008282}
8283
8284/**
8285 * xmlParseAttribute2:
8286 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008287 * @pref: the element prefix
8288 * @elem: the element name
8289 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008291 * @len: an int * to save the length of the attribute
8292 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008293 *
8294 * parse an attribute in the new SAX2 framework.
8295 *
8296 * Returns the attribute name, and the value in *value, .
8297 */
8298
8299static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008300xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008301 const xmlChar * pref, const xmlChar * elem,
8302 const xmlChar ** prefix, xmlChar ** value,
8303 int *len, int *alloc)
8304{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008305 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008306 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008307 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008308
8309 *value = NULL;
8310 GROW;
8311 name = xmlParseQName(ctxt, prefix);
8312 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008313 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8314 "error parsing attribute name\n");
8315 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 }
8317
8318 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008319 * get the type if needed
8320 */
8321 if (ctxt->attsSpecial != NULL) {
8322 int type;
8323
8324 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008325 pref, elem, *prefix, name);
8326 if (type != 0)
8327 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008328 }
8329
8330 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008331 * read the value
8332 */
8333 SKIP_BLANKS;
8334 if (RAW == '=') {
8335 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008336 SKIP_BLANKS;
8337 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8338 if (normalize) {
8339 /*
8340 * Sometimes a second normalisation pass for spaces is needed
8341 * but that only happens if charrefs or entities refernces
8342 * have been used in the attribute value, i.e. the attribute
8343 * value have been extracted in an allocated string already.
8344 */
8345 if (*alloc) {
8346 const xmlChar *val2;
8347
8348 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008349 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008350 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008351 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008352 }
8353 }
8354 }
8355 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008356 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008357 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8358 "Specification mandate value for attribute %s\n",
8359 name);
8360 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008361 }
8362
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008363 if (*prefix == ctxt->str_xml) {
8364 /*
8365 * Check that xml:lang conforms to the specification
8366 * No more registered as an error, just generate a warning now
8367 * since this was deprecated in XML second edition
8368 */
8369 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8370 internal_val = xmlStrndup(val, *len);
8371 if (!xmlCheckLanguageID(internal_val)) {
8372 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8373 "Malformed value for xml:lang : %s\n",
8374 internal_val, NULL);
8375 }
8376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008377
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008378 /*
8379 * Check that xml:space conforms to the specification
8380 */
8381 if (xmlStrEqual(name, BAD_CAST "space")) {
8382 internal_val = xmlStrndup(val, *len);
8383 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8384 *(ctxt->space) = 0;
8385 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8386 *(ctxt->space) = 1;
8387 else {
8388 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8389 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8390 internal_val, NULL);
8391 }
8392 }
8393 if (internal_val) {
8394 xmlFree(internal_val);
8395 }
8396 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008397
8398 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008399 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008400}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008401/**
8402 * xmlParseStartTag2:
8403 * @ctxt: an XML parser context
8404 *
8405 * parse a start of tag either for rule element or
8406 * EmptyElement. In both case we don't parse the tag closing chars.
8407 * This routine is called when running SAX2 parsing
8408 *
8409 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8410 *
8411 * [ WFC: Unique Att Spec ]
8412 * No attribute name may appear more than once in the same start-tag or
8413 * empty-element tag.
8414 *
8415 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
8419 * empty-element tag.
8420 *
8421 * With namespace:
8422 *
8423 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8424 *
8425 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8426 *
8427 * Returns the element name parsed
8428 */
8429
8430static const xmlChar *
8431xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008432 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008433 const xmlChar *localname;
8434 const xmlChar *prefix;
8435 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008436 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008437 const xmlChar *nsname;
8438 xmlChar *attvalue;
8439 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008440 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008441 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008442 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008443 const xmlChar *base;
8444 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008445 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008446
8447 if (RAW != '<') return(NULL);
8448 NEXT1;
8449
8450 /*
8451 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8452 * point since the attribute values may be stored as pointers to
8453 * the buffer and calling SHRINK would destroy them !
8454 * The Shrinking is only possible once the full set of attribute
8455 * callbacks have been done.
8456 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008457reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008458 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008459 base = ctxt->input->base;
8460 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008461 oldline = ctxt->input->line;
8462 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008463 nbatts = 0;
8464 nratts = 0;
8465 nbdef = 0;
8466 nbNs = 0;
8467 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008468 /* Forget any namespaces added during an earlier parse of this element. */
8469 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008470
8471 localname = xmlParseQName(ctxt, &prefix);
8472 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008473 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8474 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008475 return(NULL);
8476 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008477 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008478
8479 /*
8480 * Now parse the attributes, it ends up with the ending
8481 *
8482 * (S Attribute)* S?
8483 */
8484 SKIP_BLANKS;
8485 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008486 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008487
8488 while ((RAW != '>') &&
8489 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008490 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008491 const xmlChar *q = CUR_PTR;
8492 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008493 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008495 attname = xmlParseAttribute2(ctxt, prefix, localname,
8496 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008497 if (ctxt->input->base != base) {
8498 if ((attvalue != NULL) && (alloc != 0))
8499 xmlFree(attvalue);
8500 attvalue = NULL;
8501 goto base_changed;
8502 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008503 if ((attname != NULL) && (attvalue != NULL)) {
8504 if (len < 0) len = xmlStrlen(attvalue);
8505 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008506 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8507 xmlURIPtr uri;
8508
8509 if (*URL != 0) {
8510 uri = xmlParseURI((const char *) URL);
8511 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008512 xmlNsErr(ctxt, XML_WAR_NS_URI,
8513 "xmlns: '%s' is not a valid URI\n",
8514 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008515 } else {
Daniel Veillard37334572008-07-31 08:20:02 +00008516 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8517 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8518 "xmlns: URI %s is not absolute\n",
8519 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008520 }
8521 xmlFreeURI(uri);
8522 }
Daniel Veillard37334572008-07-31 08:20:02 +00008523 if (URL == ctxt->str_xml_ns) {
8524 if (attname != ctxt->str_xml) {
8525 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8526 "xml namespace URI cannot be the default namespace\n",
8527 NULL, NULL, NULL);
8528 }
8529 goto skip_default_ns;
8530 }
8531 if ((len == 29) &&
8532 (xmlStrEqual(URL,
8533 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8534 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8535 "reuse of the xmlns namespace name is forbidden\n",
8536 NULL, NULL, NULL);
8537 goto skip_default_ns;
8538 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008539 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008540 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008541 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008542 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008543 for (j = 1;j <= nbNs;j++)
8544 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8545 break;
8546 if (j <= nbNs)
8547 xmlErrAttributeDup(ctxt, NULL, attname);
8548 else
8549 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008550skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008551 if (alloc != 0) xmlFree(attvalue);
8552 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553 continue;
8554 }
8555 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008556 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8557 xmlURIPtr uri;
8558
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008559 if (attname == ctxt->str_xml) {
8560 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008561 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8562 "xml namespace prefix mapped to wrong URI\n",
8563 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008564 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008565 /*
8566 * Do not keep a namespace definition node
8567 */
Daniel Veillard37334572008-07-31 08:20:02 +00008568 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008569 }
Daniel Veillard37334572008-07-31 08:20:02 +00008570 if (URL == ctxt->str_xml_ns) {
8571 if (attname != ctxt->str_xml) {
8572 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8573 "xml namespace URI mapped to wrong prefix\n",
8574 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008575 }
Daniel Veillard37334572008-07-31 08:20:02 +00008576 goto skip_ns;
8577 }
8578 if (attname == ctxt->str_xmlns) {
8579 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8580 "redefinition of the xmlns prefix is forbidden\n",
8581 NULL, NULL, NULL);
8582 goto skip_ns;
8583 }
8584 if ((len == 29) &&
8585 (xmlStrEqual(URL,
8586 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8587 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8588 "reuse of the xmlns namespace name is forbidden\n",
8589 NULL, NULL, NULL);
8590 goto skip_ns;
8591 }
8592 if ((URL == NULL) || (URL[0] == 0)) {
8593 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8594 "xmlns:%s: Empty XML namespace is not allowed\n",
8595 attname, NULL, NULL);
8596 goto skip_ns;
8597 } else {
8598 uri = xmlParseURI((const char *) URL);
8599 if (uri == NULL) {
8600 xmlNsErr(ctxt, XML_WAR_NS_URI,
8601 "xmlns:%s: '%s' is not a valid URI\n",
8602 attname, URL, NULL);
8603 } else {
8604 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8605 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8606 "xmlns:%s: URI %s is not absolute\n",
8607 attname, URL, NULL);
8608 }
8609 xmlFreeURI(uri);
8610 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008611 }
8612
Daniel Veillard0fb18932003-09-07 09:14:37 +00008613 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008614 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008615 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008616 for (j = 1;j <= nbNs;j++)
8617 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8618 break;
8619 if (j <= nbNs)
8620 xmlErrAttributeDup(ctxt, aprefix, attname);
8621 else
8622 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008623skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008624 if (alloc != 0) xmlFree(attvalue);
8625 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008626 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008627 continue;
8628 }
8629
8630 /*
8631 * Add the pair to atts
8632 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8634 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 if (attvalue[len] == 0)
8636 xmlFree(attvalue);
8637 goto failed;
8638 }
8639 maxatts = ctxt->maxatts;
8640 atts = ctxt->atts;
8641 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008642 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008643 atts[nbatts++] = attname;
8644 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008646 atts[nbatts++] = attvalue;
8647 attvalue += len;
8648 atts[nbatts++] = attvalue;
8649 /*
8650 * tag if some deallocation is needed
8651 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008652 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008653 } else {
8654 if ((attvalue != NULL) && (attvalue[len] == 0))
8655 xmlFree(attvalue);
8656 }
8657
Daniel Veillard37334572008-07-31 08:20:02 +00008658failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659
8660 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008661 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008662 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8663 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008664 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8666 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008667 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668 }
8669 SKIP_BLANKS;
8670 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8671 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008672 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 break;
8675 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008676 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008677 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008678 }
8679
Daniel Veillard0fb18932003-09-07 09:14:37 +00008680 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008681 * The attributes defaulting
8682 */
8683 if (ctxt->attsDefault != NULL) {
8684 xmlDefAttrsPtr defaults;
8685
8686 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8687 if (defaults != NULL) {
8688 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008689 attname = defaults->values[5 * i];
8690 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008691
8692 /*
8693 * special work for namespaces defaulted defs
8694 */
8695 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8696 /*
8697 * check that it's not a defined namespace
8698 */
8699 for (j = 1;j <= nbNs;j++)
8700 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8701 break;
8702 if (j <= nbNs) continue;
8703
8704 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008705 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008706 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008707 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 nbNs++;
8709 }
8710 } else if (aprefix == ctxt->str_xmlns) {
8711 /*
8712 * check that it's not a defined namespace
8713 */
8714 for (j = 1;j <= nbNs;j++)
8715 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8716 break;
8717 if (j <= nbNs) continue;
8718
8719 nsname = xmlGetNamespace(ctxt, attname);
8720 if (nsname != defaults->values[2]) {
8721 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008722 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008723 nbNs++;
8724 }
8725 } else {
8726 /*
8727 * check that it's not a defined attribute
8728 */
8729 for (j = 0;j < nbatts;j+=5) {
8730 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8731 break;
8732 }
8733 if (j < nbatts) continue;
8734
8735 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8736 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008737 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008738 }
8739 maxatts = ctxt->maxatts;
8740 atts = ctxt->atts;
8741 }
8742 atts[nbatts++] = attname;
8743 atts[nbatts++] = aprefix;
8744 if (aprefix == NULL)
8745 atts[nbatts++] = NULL;
8746 else
8747 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008748 atts[nbatts++] = defaults->values[5 * i + 2];
8749 atts[nbatts++] = defaults->values[5 * i + 3];
8750 if ((ctxt->standalone == 1) &&
8751 (defaults->values[5 * i + 4] != NULL)) {
8752 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8753 "standalone: attribute %s on %s defaulted from external subset\n",
8754 attname, localname);
8755 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 nbdef++;
8757 }
8758 }
8759 }
8760 }
8761
Daniel Veillarde70c8772003-11-25 07:21:18 +00008762 /*
8763 * The attributes checkings
8764 */
8765 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008766 /*
8767 * The default namespace does not apply to attribute names.
8768 */
8769 if (atts[i + 1] != NULL) {
8770 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8771 if (nsname == NULL) {
8772 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8773 "Namespace prefix %s for %s on %s is not defined\n",
8774 atts[i + 1], atts[i], localname);
8775 }
8776 atts[i + 2] = nsname;
8777 } else
8778 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008779 /*
8780 * [ WFC: Unique Att Spec ]
8781 * No attribute name may appear more than once in the same
8782 * start-tag or empty-element tag.
8783 * As extended by the Namespace in XML REC.
8784 */
8785 for (j = 0; j < i;j += 5) {
8786 if (atts[i] == atts[j]) {
8787 if (atts[i+1] == atts[j+1]) {
8788 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8789 break;
8790 }
8791 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8792 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8793 "Namespaced Attribute %s in '%s' redefined\n",
8794 atts[i], nsname, NULL);
8795 break;
8796 }
8797 }
8798 }
8799 }
8800
Daniel Veillarde57ec792003-09-10 10:50:59 +00008801 nsname = xmlGetNamespace(ctxt, prefix);
8802 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008803 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8804 "Namespace prefix %s on %s is not defined\n",
8805 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008806 }
8807 *pref = prefix;
8808 *URI = nsname;
8809
8810 /*
8811 * SAX: Start of Element !
8812 */
8813 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8814 (!ctxt->disableSAX)) {
8815 if (nbNs > 0)
8816 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8817 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8818 nbatts / 5, nbdef, atts);
8819 else
8820 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8821 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8822 }
8823
8824 /*
8825 * Free up attribute allocated strings if needed
8826 */
8827 if (attval != 0) {
8828 for (i = 3,j = 0; j < nratts;i += 5,j++)
8829 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8830 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831 }
8832
8833 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008834
8835base_changed:
8836 /*
8837 * the attribute strings are valid iif the base didn't changed
8838 */
8839 if (attval != 0) {
8840 for (i = 3,j = 0; j < nratts;i += 5,j++)
8841 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8842 xmlFree((xmlChar *) atts[i]);
8843 }
8844 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008845 ctxt->input->line = oldline;
8846 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847 if (ctxt->wellFormed == 1) {
8848 goto reparse;
8849 }
8850 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851}
8852
8853/**
8854 * xmlParseEndTag2:
8855 * @ctxt: an XML parser context
8856 * @line: line of the start tag
8857 * @nsNr: number of namespaces on the start tag
8858 *
8859 * parse an end of tag
8860 *
8861 * [42] ETag ::= '</' Name S? '>'
8862 *
8863 * With namespace
8864 *
8865 * [NS 9] ETag ::= '</' QName S? '>'
8866 */
8867
8868static void
8869xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008870 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008871 const xmlChar *name;
8872
8873 GROW;
8874 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008875 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008876 return;
8877 }
8878 SKIP(2);
8879
William M. Brack13dfa872004-09-18 04:52:08 +00008880 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008881 if (ctxt->input->cur[tlen] == '>') {
8882 ctxt->input->cur += tlen + 1;
8883 goto done;
8884 }
8885 ctxt->input->cur += tlen;
8886 name = (xmlChar*)1;
8887 } else {
8888 if (prefix == NULL)
8889 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8890 else
8891 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8892 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893
8894 /*
8895 * We should definitely be at the ending "S? '>'" part
8896 */
8897 GROW;
8898 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008899 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008900 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901 } else
8902 NEXT1;
8903
8904 /*
8905 * [ WFC: Element Type Match ]
8906 * The Name in an element's end-tag must match the element type in the
8907 * start-tag.
8908 *
8909 */
8910 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008911 if (name == NULL) name = BAD_CAST "unparseable";
8912 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008913 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008914 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915 }
8916
8917 /*
8918 * SAX: End of Tag
8919 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008920done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008921 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8922 (!ctxt->disableSAX))
8923 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8924
Daniel Veillard0fb18932003-09-07 09:14:37 +00008925 spacePop(ctxt);
8926 if (nsNr != 0)
8927 nsPop(ctxt, nsNr);
8928 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008929}
8930
8931/**
Owen Taylor3473f882001-02-23 17:55:21 +00008932 * xmlParseCDSect:
8933 * @ctxt: an XML parser context
8934 *
8935 * Parse escaped pure raw content.
8936 *
8937 * [18] CDSect ::= CDStart CData CDEnd
8938 *
8939 * [19] CDStart ::= '<![CDATA['
8940 *
8941 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8942 *
8943 * [21] CDEnd ::= ']]>'
8944 */
8945void
8946xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8947 xmlChar *buf = NULL;
8948 int len = 0;
8949 int size = XML_PARSER_BUFFER_SIZE;
8950 int r, rl;
8951 int s, sl;
8952 int cur, l;
8953 int count = 0;
8954
Daniel Veillard8f597c32003-10-06 08:19:27 +00008955 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008956 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008957 SKIP(9);
8958 } else
8959 return;
8960
8961 ctxt->instate = XML_PARSER_CDATA_SECTION;
8962 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008963 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008964 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 ctxt->instate = XML_PARSER_CONTENT;
8966 return;
8967 }
8968 NEXTL(rl);
8969 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008970 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008971 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008972 ctxt->instate = XML_PARSER_CONTENT;
8973 return;
8974 }
8975 NEXTL(sl);
8976 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008977 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008978 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008979 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008980 return;
8981 }
William M. Brack871611b2003-10-18 04:53:14 +00008982 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008983 ((r != ']') || (s != ']') || (cur != '>'))) {
8984 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008985 xmlChar *tmp;
8986
Owen Taylor3473f882001-02-23 17:55:21 +00008987 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008988 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8989 if (tmp == NULL) {
8990 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008991 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008992 return;
8993 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008994 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008995 }
8996 COPY_BUF(rl,buf,len,r);
8997 r = s;
8998 rl = sl;
8999 s = cur;
9000 sl = l;
9001 count++;
9002 if (count > 50) {
9003 GROW;
9004 count = 0;
9005 }
9006 NEXTL(l);
9007 cur = CUR_CHAR(l);
9008 }
9009 buf[len] = 0;
9010 ctxt->instate = XML_PARSER_CONTENT;
9011 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009012 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009013 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009014 xmlFree(buf);
9015 return;
9016 }
9017 NEXTL(l);
9018
9019 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009020 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009021 */
9022 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9023 if (ctxt->sax->cdataBlock != NULL)
9024 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009025 else if (ctxt->sax->characters != NULL)
9026 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009027 }
9028 xmlFree(buf);
9029}
9030
9031/**
9032 * xmlParseContent:
9033 * @ctxt: an XML parser context
9034 *
9035 * Parse a content:
9036 *
9037 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9038 */
9039
9040void
9041xmlParseContent(xmlParserCtxtPtr ctxt) {
9042 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009043 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009044 ((RAW != '<') || (NXT(1) != '/')) &&
9045 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009046 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009047 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009048 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009049
9050 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009051 * First case : a Processing Instruction.
9052 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009053 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009054 xmlParsePI(ctxt);
9055 }
9056
9057 /*
9058 * Second case : a CDSection
9059 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009060 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009061 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009062 xmlParseCDSect(ctxt);
9063 }
9064
9065 /*
9066 * Third case : a comment
9067 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009068 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009069 (NXT(2) == '-') && (NXT(3) == '-')) {
9070 xmlParseComment(ctxt);
9071 ctxt->instate = XML_PARSER_CONTENT;
9072 }
9073
9074 /*
9075 * Fourth case : a sub-element.
9076 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009077 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009078 xmlParseElement(ctxt);
9079 }
9080
9081 /*
9082 * Fifth case : a reference. If if has not been resolved,
9083 * parsing returns it's Name, create the node
9084 */
9085
Daniel Veillard21a0f912001-02-25 19:54:14 +00009086 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009087 xmlParseReference(ctxt);
9088 }
9089
9090 /*
9091 * Last case, text. Note that References are handled directly.
9092 */
9093 else {
9094 xmlParseCharData(ctxt, 0);
9095 }
9096
9097 GROW;
9098 /*
9099 * Pop-up of finished entities.
9100 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009101 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009102 xmlPopInput(ctxt);
9103 SHRINK;
9104
Daniel Veillardfdc91562002-07-01 21:52:03 +00009105 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009106 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9107 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009108 ctxt->instate = XML_PARSER_EOF;
9109 break;
9110 }
9111 }
9112}
9113
9114/**
9115 * xmlParseElement:
9116 * @ctxt: an XML parser context
9117 *
9118 * parse an XML element, this is highly recursive
9119 *
9120 * [39] element ::= EmptyElemTag | STag content ETag
9121 *
9122 * [ WFC: Element Type Match ]
9123 * The Name in an element's end-tag must match the element type in the
9124 * start-tag.
9125 *
Owen Taylor3473f882001-02-23 17:55:21 +00009126 */
9127
9128void
9129xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009130 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009131 const xmlChar *prefix;
9132 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009133 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009134 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009135 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009137
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009138 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
9139 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9140 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
9141 xmlParserMaxDepth);
9142 ctxt->instate = XML_PARSER_EOF;
9143 return;
9144 }
9145
Owen Taylor3473f882001-02-23 17:55:21 +00009146 /* Capture start position */
9147 if (ctxt->record_info) {
9148 node_info.begin_pos = ctxt->input->consumed +
9149 (CUR_PTR - ctxt->input->base);
9150 node_info.begin_line = ctxt->input->line;
9151 }
9152
9153 if (ctxt->spaceNr == 0)
9154 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009155 else if (*ctxt->space == -2)
9156 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009157 else
9158 spacePush(ctxt, *ctxt->space);
9159
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009160 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009161#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009163#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009164 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009165#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009166 else
9167 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009168#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009169 if (name == NULL) {
9170 spacePop(ctxt);
9171 return;
9172 }
9173 namePush(ctxt, name);
9174 ret = ctxt->node;
9175
Daniel Veillard4432df22003-09-28 18:58:27 +00009176#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009177 /*
9178 * [ VC: Root Element Type ]
9179 * The Name in the document type declaration must match the element
9180 * type of the root element.
9181 */
9182 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9183 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9184 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009185#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009186
9187 /*
9188 * Check for an Empty Element.
9189 */
9190 if ((RAW == '/') && (NXT(1) == '>')) {
9191 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 if (ctxt->sax2) {
9193 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9194 (!ctxt->disableSAX))
9195 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009196#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009197 } else {
9198 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9199 (!ctxt->disableSAX))
9200 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009201#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009202 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009203 namePop(ctxt);
9204 spacePop(ctxt);
9205 if (nsNr != ctxt->nsNr)
9206 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009207 if ( ret != NULL && ctxt->record_info ) {
9208 node_info.end_pos = ctxt->input->consumed +
9209 (CUR_PTR - ctxt->input->base);
9210 node_info.end_line = ctxt->input->line;
9211 node_info.node = ret;
9212 xmlParserAddNodeInfo(ctxt, &node_info);
9213 }
9214 return;
9215 }
9216 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009217 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009218 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009219 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9220 "Couldn't find end of Start Tag %s line %d\n",
9221 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009222
9223 /*
9224 * end of parsing of this node.
9225 */
9226 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009227 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009228 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009229 if (nsNr != ctxt->nsNr)
9230 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009231
9232 /*
9233 * Capture end position and add node
9234 */
9235 if ( ret != NULL && ctxt->record_info ) {
9236 node_info.end_pos = ctxt->input->consumed +
9237 (CUR_PTR - ctxt->input->base);
9238 node_info.end_line = ctxt->input->line;
9239 node_info.node = ret;
9240 xmlParserAddNodeInfo(ctxt, &node_info);
9241 }
9242 return;
9243 }
9244
9245 /*
9246 * Parse the content of the element:
9247 */
9248 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009249 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009250 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009251 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009252 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009253
9254 /*
9255 * end of parsing of this node.
9256 */
9257 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009258 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009259 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009260 if (nsNr != ctxt->nsNr)
9261 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009262 return;
9263 }
9264
9265 /*
9266 * parse the end of tag: '</' should be here.
9267 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009268 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009269 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009270 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009271 }
9272#ifdef LIBXML_SAX1_ENABLED
9273 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009274 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009275#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009276
9277 /*
9278 * Capture end position and add node
9279 */
9280 if ( ret != NULL && ctxt->record_info ) {
9281 node_info.end_pos = ctxt->input->consumed +
9282 (CUR_PTR - ctxt->input->base);
9283 node_info.end_line = ctxt->input->line;
9284 node_info.node = ret;
9285 xmlParserAddNodeInfo(ctxt, &node_info);
9286 }
9287}
9288
9289/**
9290 * xmlParseVersionNum:
9291 * @ctxt: an XML parser context
9292 *
9293 * parse the XML version value.
9294 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009295 * [26] VersionNum ::= '1.' [0-9]+
9296 *
9297 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009298 *
9299 * Returns the string giving the XML version number, or NULL
9300 */
9301xmlChar *
9302xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9303 xmlChar *buf = NULL;
9304 int len = 0;
9305 int size = 10;
9306 xmlChar cur;
9307
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009308 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009309 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009310 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009311 return(NULL);
9312 }
9313 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009314 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009315 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009316 return(NULL);
9317 }
9318 buf[len++] = cur;
9319 NEXT;
9320 cur=CUR;
9321 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009322 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009323 return(NULL);
9324 }
9325 buf[len++] = cur;
9326 NEXT;
9327 cur=CUR;
9328 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009329 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009330 xmlChar *tmp;
9331
Owen Taylor3473f882001-02-23 17:55:21 +00009332 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009333 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9334 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009335 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009336 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009337 return(NULL);
9338 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009339 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009340 }
9341 buf[len++] = cur;
9342 NEXT;
9343 cur=CUR;
9344 }
9345 buf[len] = 0;
9346 return(buf);
9347}
9348
9349/**
9350 * xmlParseVersionInfo:
9351 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009352 *
Owen Taylor3473f882001-02-23 17:55:21 +00009353 * parse the XML version.
9354 *
9355 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009356 *
Owen Taylor3473f882001-02-23 17:55:21 +00009357 * [25] Eq ::= S? '=' S?
9358 *
9359 * Returns the version string, e.g. "1.0"
9360 */
9361
9362xmlChar *
9363xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9364 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009365
Daniel Veillarda07050d2003-10-19 14:46:32 +00009366 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009367 SKIP(7);
9368 SKIP_BLANKS;
9369 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009370 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009371 return(NULL);
9372 }
9373 NEXT;
9374 SKIP_BLANKS;
9375 if (RAW == '"') {
9376 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009377 version = xmlParseVersionNum(ctxt);
9378 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009379 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009380 } else
9381 NEXT;
9382 } else if (RAW == '\''){
9383 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009384 version = xmlParseVersionNum(ctxt);
9385 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009386 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009387 } else
9388 NEXT;
9389 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009390 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009391 }
9392 }
9393 return(version);
9394}
9395
9396/**
9397 * xmlParseEncName:
9398 * @ctxt: an XML parser context
9399 *
9400 * parse the XML encoding name
9401 *
9402 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9403 *
9404 * Returns the encoding name value or NULL
9405 */
9406xmlChar *
9407xmlParseEncName(xmlParserCtxtPtr ctxt) {
9408 xmlChar *buf = NULL;
9409 int len = 0;
9410 int size = 10;
9411 xmlChar cur;
9412
9413 cur = CUR;
9414 if (((cur >= 'a') && (cur <= 'z')) ||
9415 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009416 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009417 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009418 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009419 return(NULL);
9420 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009421
Owen Taylor3473f882001-02-23 17:55:21 +00009422 buf[len++] = cur;
9423 NEXT;
9424 cur = CUR;
9425 while (((cur >= 'a') && (cur <= 'z')) ||
9426 ((cur >= 'A') && (cur <= 'Z')) ||
9427 ((cur >= '0') && (cur <= '9')) ||
9428 (cur == '.') || (cur == '_') ||
9429 (cur == '-')) {
9430 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009431 xmlChar *tmp;
9432
Owen Taylor3473f882001-02-23 17:55:21 +00009433 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009434 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9435 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009436 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009437 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009438 return(NULL);
9439 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009440 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009441 }
9442 buf[len++] = cur;
9443 NEXT;
9444 cur = CUR;
9445 if (cur == 0) {
9446 SHRINK;
9447 GROW;
9448 cur = CUR;
9449 }
9450 }
9451 buf[len] = 0;
9452 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009453 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009454 }
9455 return(buf);
9456}
9457
9458/**
9459 * xmlParseEncodingDecl:
9460 * @ctxt: an XML parser context
9461 *
9462 * parse the XML encoding declaration
9463 *
9464 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9465 *
9466 * this setups the conversion filters.
9467 *
9468 * Returns the encoding value or NULL
9469 */
9470
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009471const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009472xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9473 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009474
9475 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009476 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009477 SKIP(8);
9478 SKIP_BLANKS;
9479 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009480 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009481 return(NULL);
9482 }
9483 NEXT;
9484 SKIP_BLANKS;
9485 if (RAW == '"') {
9486 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009487 encoding = xmlParseEncName(ctxt);
9488 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009489 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009490 } else
9491 NEXT;
9492 } else if (RAW == '\''){
9493 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009494 encoding = xmlParseEncName(ctxt);
9495 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009496 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009497 } else
9498 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009499 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009500 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009501 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009502 /*
9503 * UTF-16 encoding stwich has already taken place at this stage,
9504 * more over the little-endian/big-endian selection is already done
9505 */
9506 if ((encoding != NULL) &&
9507 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9508 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009509 /*
9510 * If no encoding was passed to the parser, that we are
9511 * using UTF-16 and no decoder is present i.e. the
9512 * document is apparently UTF-8 compatible, then raise an
9513 * encoding mismatch fatal error
9514 */
9515 if ((ctxt->encoding == NULL) &&
9516 (ctxt->input->buf != NULL) &&
9517 (ctxt->input->buf->encoder == NULL)) {
9518 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9519 "Document labelled UTF-16 but has UTF-8 content\n");
9520 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009521 if (ctxt->encoding != NULL)
9522 xmlFree((xmlChar *) ctxt->encoding);
9523 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009524 }
9525 /*
9526 * UTF-8 encoding is handled natively
9527 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009528 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009529 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9530 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009531 if (ctxt->encoding != NULL)
9532 xmlFree((xmlChar *) ctxt->encoding);
9533 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009534 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009535 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009536 xmlCharEncodingHandlerPtr handler;
9537
9538 if (ctxt->input->encoding != NULL)
9539 xmlFree((xmlChar *) ctxt->input->encoding);
9540 ctxt->input->encoding = encoding;
9541
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009542 handler = xmlFindCharEncodingHandler((const char *) encoding);
9543 if (handler != NULL) {
9544 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009545 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009546 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009547 "Unsupported encoding %s\n", encoding);
9548 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009549 }
9550 }
9551 }
9552 return(encoding);
9553}
9554
9555/**
9556 * xmlParseSDDecl:
9557 * @ctxt: an XML parser context
9558 *
9559 * parse the XML standalone declaration
9560 *
9561 * [32] SDDecl ::= S 'standalone' Eq
9562 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9563 *
9564 * [ VC: Standalone Document Declaration ]
9565 * TODO The standalone document declaration must have the value "no"
9566 * if any external markup declarations contain declarations of:
9567 * - attributes with default values, if elements to which these
9568 * attributes apply appear in the document without specifications
9569 * of values for these attributes, or
9570 * - entities (other than amp, lt, gt, apos, quot), if references
9571 * to those entities appear in the document, or
9572 * - attributes with values subject to normalization, where the
9573 * attribute appears in the document with a value which will change
9574 * as a result of normalization, or
9575 * - element types with element content, if white space occurs directly
9576 * within any instance of those types.
9577 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009578 * Returns:
9579 * 1 if standalone="yes"
9580 * 0 if standalone="no"
9581 * -2 if standalone attribute is missing or invalid
9582 * (A standalone value of -2 means that the XML declaration was found,
9583 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009584 */
9585
9586int
9587xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009588 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009589
9590 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009591 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009592 SKIP(10);
9593 SKIP_BLANKS;
9594 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009595 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009596 return(standalone);
9597 }
9598 NEXT;
9599 SKIP_BLANKS;
9600 if (RAW == '\''){
9601 NEXT;
9602 if ((RAW == 'n') && (NXT(1) == 'o')) {
9603 standalone = 0;
9604 SKIP(2);
9605 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9606 (NXT(2) == 's')) {
9607 standalone = 1;
9608 SKIP(3);
9609 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009610 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009611 }
9612 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009613 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009614 } else
9615 NEXT;
9616 } else if (RAW == '"'){
9617 NEXT;
9618 if ((RAW == 'n') && (NXT(1) == 'o')) {
9619 standalone = 0;
9620 SKIP(2);
9621 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9622 (NXT(2) == 's')) {
9623 standalone = 1;
9624 SKIP(3);
9625 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009626 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009627 }
9628 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009629 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009630 } else
9631 NEXT;
9632 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009633 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009634 }
9635 }
9636 return(standalone);
9637}
9638
9639/**
9640 * xmlParseXMLDecl:
9641 * @ctxt: an XML parser context
9642 *
9643 * parse an XML declaration header
9644 *
9645 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9646 */
9647
9648void
9649xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9650 xmlChar *version;
9651
9652 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009653 * This value for standalone indicates that the document has an
9654 * XML declaration but it does not have a standalone attribute.
9655 * It will be overwritten later if a standalone attribute is found.
9656 */
9657 ctxt->input->standalone = -2;
9658
9659 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009660 * We know that '<?xml' is here.
9661 */
9662 SKIP(5);
9663
William M. Brack76e95df2003-10-18 16:20:14 +00009664 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9666 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009667 }
9668 SKIP_BLANKS;
9669
9670 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009671 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009672 */
9673 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009674 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009675 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009676 } else {
9677 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9678 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009679 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009680 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009681 if (ctxt->options & XML_PARSE_OLD10) {
9682 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9683 "Unsupported version '%s'\n",
9684 version);
9685 } else {
9686 if ((version[0] == '1') && ((version[1] == '.'))) {
9687 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9688 "Unsupported version '%s'\n",
9689 version, NULL);
9690 } else {
9691 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9692 "Unsupported version '%s'\n",
9693 version);
9694 }
9695 }
Daniel Veillard19840942001-11-29 16:11:38 +00009696 }
9697 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009698 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009699 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009700 }
Owen Taylor3473f882001-02-23 17:55:21 +00009701
9702 /*
9703 * We may have the encoding declaration
9704 */
William M. Brack76e95df2003-10-18 16:20:14 +00009705 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009706 if ((RAW == '?') && (NXT(1) == '>')) {
9707 SKIP(2);
9708 return;
9709 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009711 }
9712 xmlParseEncodingDecl(ctxt);
9713 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9714 /*
9715 * The XML REC instructs us to stop parsing right here
9716 */
9717 return;
9718 }
9719
9720 /*
9721 * We may have the standalone status.
9722 */
William M. Brack76e95df2003-10-18 16:20:14 +00009723 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009724 if ((RAW == '?') && (NXT(1) == '>')) {
9725 SKIP(2);
9726 return;
9727 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009728 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009729 }
9730 SKIP_BLANKS;
9731 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9732
9733 SKIP_BLANKS;
9734 if ((RAW == '?') && (NXT(1) == '>')) {
9735 SKIP(2);
9736 } else if (RAW == '>') {
9737 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009738 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009739 NEXT;
9740 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009741 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009742 MOVETO_ENDTAG(CUR_PTR);
9743 NEXT;
9744 }
9745}
9746
9747/**
9748 * xmlParseMisc:
9749 * @ctxt: an XML parser context
9750 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009751 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009752 *
9753 * [27] Misc ::= Comment | PI | S
9754 */
9755
9756void
9757xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009758 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009759 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009760 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009761 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009762 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009763 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009764 NEXT;
9765 } else
9766 xmlParseComment(ctxt);
9767 }
9768}
9769
9770/**
9771 * xmlParseDocument:
9772 * @ctxt: an XML parser context
9773 *
9774 * parse an XML document (and build a tree if using the standard SAX
9775 * interface).
9776 *
9777 * [1] document ::= prolog element Misc*
9778 *
9779 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9780 *
9781 * Returns 0, -1 in case of error. the parser context is augmented
9782 * as a result of the parsing.
9783 */
9784
9785int
9786xmlParseDocument(xmlParserCtxtPtr ctxt) {
9787 xmlChar start[4];
9788 xmlCharEncoding enc;
9789
9790 xmlInitParser();
9791
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009792 if ((ctxt == NULL) || (ctxt->input == NULL))
9793 return(-1);
9794
Owen Taylor3473f882001-02-23 17:55:21 +00009795 GROW;
9796
9797 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009798 * SAX: detecting the level.
9799 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009800 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009801
9802 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009803 * SAX: beginning of the document processing.
9804 */
9805 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9806 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9807
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009808 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9809 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009810 /*
9811 * Get the 4 first bytes and decode the charset
9812 * if enc != XML_CHAR_ENCODING_NONE
9813 * plug some encoding conversion routines.
9814 */
9815 start[0] = RAW;
9816 start[1] = NXT(1);
9817 start[2] = NXT(2);
9818 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009819 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009820 if (enc != XML_CHAR_ENCODING_NONE) {
9821 xmlSwitchEncoding(ctxt, enc);
9822 }
Owen Taylor3473f882001-02-23 17:55:21 +00009823 }
9824
9825
9826 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009827 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009828 }
9829
9830 /*
9831 * Check for the XMLDecl in the Prolog.
9832 */
9833 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009834 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009835
9836 /*
9837 * Note that we will switch encoding on the fly.
9838 */
9839 xmlParseXMLDecl(ctxt);
9840 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9841 /*
9842 * The XML REC instructs us to stop parsing right here
9843 */
9844 return(-1);
9845 }
9846 ctxt->standalone = ctxt->input->standalone;
9847 SKIP_BLANKS;
9848 } else {
9849 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9850 }
9851 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9852 ctxt->sax->startDocument(ctxt->userData);
9853
9854 /*
9855 * The Misc part of the Prolog
9856 */
9857 GROW;
9858 xmlParseMisc(ctxt);
9859
9860 /*
9861 * Then possibly doc type declaration(s) and more Misc
9862 * (doctypedecl Misc*)?
9863 */
9864 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009865 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009866
9867 ctxt->inSubset = 1;
9868 xmlParseDocTypeDecl(ctxt);
9869 if (RAW == '[') {
9870 ctxt->instate = XML_PARSER_DTD;
9871 xmlParseInternalSubset(ctxt);
9872 }
9873
9874 /*
9875 * Create and update the external subset.
9876 */
9877 ctxt->inSubset = 2;
9878 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9879 (!ctxt->disableSAX))
9880 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9881 ctxt->extSubSystem, ctxt->extSubURI);
9882 ctxt->inSubset = 0;
9883
Daniel Veillardac4118d2008-01-11 05:27:32 +00009884 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009885
9886 ctxt->instate = XML_PARSER_PROLOG;
9887 xmlParseMisc(ctxt);
9888 }
9889
9890 /*
9891 * Time to start parsing the tree itself
9892 */
9893 GROW;
9894 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009895 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9896 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009897 } else {
9898 ctxt->instate = XML_PARSER_CONTENT;
9899 xmlParseElement(ctxt);
9900 ctxt->instate = XML_PARSER_EPILOG;
9901
9902
9903 /*
9904 * The Misc part at the end
9905 */
9906 xmlParseMisc(ctxt);
9907
Daniel Veillard561b7f82002-03-20 21:55:57 +00009908 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009909 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009910 }
9911 ctxt->instate = XML_PARSER_EOF;
9912 }
9913
9914 /*
9915 * SAX: end of the document processing.
9916 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009917 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009918 ctxt->sax->endDocument(ctxt->userData);
9919
Daniel Veillard5997aca2002-03-18 18:36:20 +00009920 /*
9921 * Remove locally kept entity definitions if the tree was not built
9922 */
9923 if ((ctxt->myDoc != NULL) &&
9924 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9925 xmlFreeDoc(ctxt->myDoc);
9926 ctxt->myDoc = NULL;
9927 }
9928
Daniel Veillardae0765b2008-07-31 19:54:59 +00009929 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
9930 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
9931 if (ctxt->valid)
9932 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
9933 if (ctxt->nsWellFormed)
9934 ctxt->myDoc->properties |= XML_DOC_NSVALID;
9935 if (ctxt->options & XML_PARSE_OLD10)
9936 ctxt->myDoc->properties |= XML_DOC_OLD10;
9937 }
Daniel Veillardc7612992002-02-17 22:47:37 +00009938 if (! ctxt->wellFormed) {
9939 ctxt->valid = 0;
9940 return(-1);
9941 }
Owen Taylor3473f882001-02-23 17:55:21 +00009942 return(0);
9943}
9944
9945/**
9946 * xmlParseExtParsedEnt:
9947 * @ctxt: an XML parser context
9948 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009949 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009950 * An external general parsed entity is well-formed if it matches the
9951 * production labeled extParsedEnt.
9952 *
9953 * [78] extParsedEnt ::= TextDecl? content
9954 *
9955 * Returns 0, -1 in case of error. the parser context is augmented
9956 * as a result of the parsing.
9957 */
9958
9959int
9960xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9961 xmlChar start[4];
9962 xmlCharEncoding enc;
9963
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009964 if ((ctxt == NULL) || (ctxt->input == NULL))
9965 return(-1);
9966
Owen Taylor3473f882001-02-23 17:55:21 +00009967 xmlDefaultSAXHandlerInit();
9968
Daniel Veillard309f81d2003-09-23 09:02:53 +00009969 xmlDetectSAX2(ctxt);
9970
Owen Taylor3473f882001-02-23 17:55:21 +00009971 GROW;
9972
9973 /*
9974 * SAX: beginning of the document processing.
9975 */
9976 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9977 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9978
9979 /*
9980 * Get the 4 first bytes and decode the charset
9981 * if enc != XML_CHAR_ENCODING_NONE
9982 * plug some encoding conversion routines.
9983 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009984 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9985 start[0] = RAW;
9986 start[1] = NXT(1);
9987 start[2] = NXT(2);
9988 start[3] = NXT(3);
9989 enc = xmlDetectCharEncoding(start, 4);
9990 if (enc != XML_CHAR_ENCODING_NONE) {
9991 xmlSwitchEncoding(ctxt, enc);
9992 }
Owen Taylor3473f882001-02-23 17:55:21 +00009993 }
9994
9995
9996 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009997 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009998 }
9999
10000 /*
10001 * Check for the XMLDecl in the Prolog.
10002 */
10003 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010004 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010005
10006 /*
10007 * Note that we will switch encoding on the fly.
10008 */
10009 xmlParseXMLDecl(ctxt);
10010 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10011 /*
10012 * The XML REC instructs us to stop parsing right here
10013 */
10014 return(-1);
10015 }
10016 SKIP_BLANKS;
10017 } else {
10018 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10019 }
10020 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10021 ctxt->sax->startDocument(ctxt->userData);
10022
10023 /*
10024 * Doing validity checking on chunk doesn't make sense
10025 */
10026 ctxt->instate = XML_PARSER_CONTENT;
10027 ctxt->validate = 0;
10028 ctxt->loadsubset = 0;
10029 ctxt->depth = 0;
10030
10031 xmlParseContent(ctxt);
10032
10033 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010034 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010035 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010036 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010037 }
10038
10039 /*
10040 * SAX: end of the document processing.
10041 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010042 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010043 ctxt->sax->endDocument(ctxt->userData);
10044
10045 if (! ctxt->wellFormed) return(-1);
10046 return(0);
10047}
10048
Daniel Veillard73b013f2003-09-30 12:36:01 +000010049#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010050/************************************************************************
10051 * *
10052 * Progressive parsing interfaces *
10053 * *
10054 ************************************************************************/
10055
10056/**
10057 * xmlParseLookupSequence:
10058 * @ctxt: an XML parser context
10059 * @first: the first char to lookup
10060 * @next: the next char to lookup or zero
10061 * @third: the next char to lookup or zero
10062 *
10063 * Try to find if a sequence (first, next, third) or just (first next) or
10064 * (first) is available in the input stream.
10065 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10066 * to avoid rescanning sequences of bytes, it DOES change the state of the
10067 * parser, do not use liberally.
10068 *
10069 * Returns the index to the current parsing point if the full sequence
10070 * is available, -1 otherwise.
10071 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010072static int
Owen Taylor3473f882001-02-23 17:55:21 +000010073xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10074 xmlChar next, xmlChar third) {
10075 int base, len;
10076 xmlParserInputPtr in;
10077 const xmlChar *buf;
10078
10079 in = ctxt->input;
10080 if (in == NULL) return(-1);
10081 base = in->cur - in->base;
10082 if (base < 0) return(-1);
10083 if (ctxt->checkIndex > base)
10084 base = ctxt->checkIndex;
10085 if (in->buf == NULL) {
10086 buf = in->base;
10087 len = in->length;
10088 } else {
10089 buf = in->buf->buffer->content;
10090 len = in->buf->buffer->use;
10091 }
10092 /* take into account the sequence length */
10093 if (third) len -= 2;
10094 else if (next) len --;
10095 for (;base < len;base++) {
10096 if (buf[base] == first) {
10097 if (third != 0) {
10098 if ((buf[base + 1] != next) ||
10099 (buf[base + 2] != third)) continue;
10100 } else if (next != 0) {
10101 if (buf[base + 1] != next) continue;
10102 }
10103 ctxt->checkIndex = 0;
10104#ifdef DEBUG_PUSH
10105 if (next == 0)
10106 xmlGenericError(xmlGenericErrorContext,
10107 "PP: lookup '%c' found at %d\n",
10108 first, base);
10109 else if (third == 0)
10110 xmlGenericError(xmlGenericErrorContext,
10111 "PP: lookup '%c%c' found at %d\n",
10112 first, next, base);
10113 else
10114 xmlGenericError(xmlGenericErrorContext,
10115 "PP: lookup '%c%c%c' found at %d\n",
10116 first, next, third, base);
10117#endif
10118 return(base - (in->cur - in->base));
10119 }
10120 }
10121 ctxt->checkIndex = base;
10122#ifdef DEBUG_PUSH
10123 if (next == 0)
10124 xmlGenericError(xmlGenericErrorContext,
10125 "PP: lookup '%c' failed\n", first);
10126 else if (third == 0)
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: lookup '%c%c' failed\n", first, next);
10129 else
10130 xmlGenericError(xmlGenericErrorContext,
10131 "PP: lookup '%c%c%c' failed\n", first, next, third);
10132#endif
10133 return(-1);
10134}
10135
10136/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010137 * xmlParseGetLasts:
10138 * @ctxt: an XML parser context
10139 * @lastlt: pointer to store the last '<' from the input
10140 * @lastgt: pointer to store the last '>' from the input
10141 *
10142 * Lookup the last < and > in the current chunk
10143 */
10144static void
10145xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10146 const xmlChar **lastgt) {
10147 const xmlChar *tmp;
10148
10149 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10150 xmlGenericError(xmlGenericErrorContext,
10151 "Internal error: xmlParseGetLasts\n");
10152 return;
10153 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010154 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010155 tmp = ctxt->input->end;
10156 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010157 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010158 if (tmp < ctxt->input->base) {
10159 *lastlt = NULL;
10160 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010161 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010162 *lastlt = tmp;
10163 tmp++;
10164 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10165 if (*tmp == '\'') {
10166 tmp++;
10167 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10168 if (tmp < ctxt->input->end) tmp++;
10169 } else if (*tmp == '"') {
10170 tmp++;
10171 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10172 if (tmp < ctxt->input->end) tmp++;
10173 } else
10174 tmp++;
10175 }
10176 if (tmp < ctxt->input->end)
10177 *lastgt = tmp;
10178 else {
10179 tmp = *lastlt;
10180 tmp--;
10181 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10182 if (tmp >= ctxt->input->base)
10183 *lastgt = tmp;
10184 else
10185 *lastgt = NULL;
10186 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010187 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010188 } else {
10189 *lastlt = NULL;
10190 *lastgt = NULL;
10191 }
10192}
10193/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010194 * xmlCheckCdataPush:
10195 * @cur: pointer to the bock of characters
10196 * @len: length of the block in bytes
10197 *
10198 * Check that the block of characters is okay as SCdata content [20]
10199 *
10200 * Returns the number of bytes to pass if okay, a negative index where an
10201 * UTF-8 error occured otherwise
10202 */
10203static int
10204xmlCheckCdataPush(const xmlChar *utf, int len) {
10205 int ix;
10206 unsigned char c;
10207 int codepoint;
10208
10209 if ((utf == NULL) || (len <= 0))
10210 return(0);
10211
10212 for (ix = 0; ix < len;) { /* string is 0-terminated */
10213 c = utf[ix];
10214 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10215 if (c >= 0x20)
10216 ix++;
10217 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10218 ix++;
10219 else
10220 return(-ix);
10221 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10222 if (ix + 2 > len) return(ix);
10223 if ((utf[ix+1] & 0xc0 ) != 0x80)
10224 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010225 codepoint = (utf[ix] & 0x1f) << 6;
10226 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010227 if (!xmlIsCharQ(codepoint))
10228 return(-ix);
10229 ix += 2;
10230 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10231 if (ix + 3 > len) return(ix);
10232 if (((utf[ix+1] & 0xc0) != 0x80) ||
10233 ((utf[ix+2] & 0xc0) != 0x80))
10234 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010235 codepoint = (utf[ix] & 0xf) << 12;
10236 codepoint |= (utf[ix+1] & 0x3f) << 6;
10237 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010238 if (!xmlIsCharQ(codepoint))
10239 return(-ix);
10240 ix += 3;
10241 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10242 if (ix + 4 > len) return(ix);
10243 if (((utf[ix+1] & 0xc0) != 0x80) ||
10244 ((utf[ix+2] & 0xc0) != 0x80) ||
10245 ((utf[ix+3] & 0xc0) != 0x80))
10246 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010247 codepoint = (utf[ix] & 0x7) << 18;
10248 codepoint |= (utf[ix+1] & 0x3f) << 12;
10249 codepoint |= (utf[ix+2] & 0x3f) << 6;
10250 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010251 if (!xmlIsCharQ(codepoint))
10252 return(-ix);
10253 ix += 4;
10254 } else /* unknown encoding */
10255 return(-ix);
10256 }
10257 return(ix);
10258}
10259
10260/**
Owen Taylor3473f882001-02-23 17:55:21 +000010261 * xmlParseTryOrFinish:
10262 * @ctxt: an XML parser context
10263 * @terminate: last chunk indicator
10264 *
10265 * Try to progress on parsing
10266 *
10267 * Returns zero if no parsing was possible
10268 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010269static int
Owen Taylor3473f882001-02-23 17:55:21 +000010270xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10271 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010272 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010273 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010274 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010275
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010276 if (ctxt->input == NULL)
10277 return(0);
10278
Owen Taylor3473f882001-02-23 17:55:21 +000010279#ifdef DEBUG_PUSH
10280 switch (ctxt->instate) {
10281 case XML_PARSER_EOF:
10282 xmlGenericError(xmlGenericErrorContext,
10283 "PP: try EOF\n"); break;
10284 case XML_PARSER_START:
10285 xmlGenericError(xmlGenericErrorContext,
10286 "PP: try START\n"); break;
10287 case XML_PARSER_MISC:
10288 xmlGenericError(xmlGenericErrorContext,
10289 "PP: try MISC\n");break;
10290 case XML_PARSER_COMMENT:
10291 xmlGenericError(xmlGenericErrorContext,
10292 "PP: try COMMENT\n");break;
10293 case XML_PARSER_PROLOG:
10294 xmlGenericError(xmlGenericErrorContext,
10295 "PP: try PROLOG\n");break;
10296 case XML_PARSER_START_TAG:
10297 xmlGenericError(xmlGenericErrorContext,
10298 "PP: try START_TAG\n");break;
10299 case XML_PARSER_CONTENT:
10300 xmlGenericError(xmlGenericErrorContext,
10301 "PP: try CONTENT\n");break;
10302 case XML_PARSER_CDATA_SECTION:
10303 xmlGenericError(xmlGenericErrorContext,
10304 "PP: try CDATA_SECTION\n");break;
10305 case XML_PARSER_END_TAG:
10306 xmlGenericError(xmlGenericErrorContext,
10307 "PP: try END_TAG\n");break;
10308 case XML_PARSER_ENTITY_DECL:
10309 xmlGenericError(xmlGenericErrorContext,
10310 "PP: try ENTITY_DECL\n");break;
10311 case XML_PARSER_ENTITY_VALUE:
10312 xmlGenericError(xmlGenericErrorContext,
10313 "PP: try ENTITY_VALUE\n");break;
10314 case XML_PARSER_ATTRIBUTE_VALUE:
10315 xmlGenericError(xmlGenericErrorContext,
10316 "PP: try ATTRIBUTE_VALUE\n");break;
10317 case XML_PARSER_DTD:
10318 xmlGenericError(xmlGenericErrorContext,
10319 "PP: try DTD\n");break;
10320 case XML_PARSER_EPILOG:
10321 xmlGenericError(xmlGenericErrorContext,
10322 "PP: try EPILOG\n");break;
10323 case XML_PARSER_PI:
10324 xmlGenericError(xmlGenericErrorContext,
10325 "PP: try PI\n");break;
10326 case XML_PARSER_IGNORE:
10327 xmlGenericError(xmlGenericErrorContext,
10328 "PP: try IGNORE\n");break;
10329 }
10330#endif
10331
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010332 if ((ctxt->input != NULL) &&
10333 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010334 xmlSHRINK(ctxt);
10335 ctxt->checkIndex = 0;
10336 }
10337 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010338
Daniel Veillarda880b122003-04-21 21:36:41 +000010339 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010340 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010341 return(0);
10342
10343
Owen Taylor3473f882001-02-23 17:55:21 +000010344 /*
10345 * Pop-up of finished entities.
10346 */
10347 while ((RAW == 0) && (ctxt->inputNr > 1))
10348 xmlPopInput(ctxt);
10349
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010350 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010351 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010352 avail = ctxt->input->length -
10353 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010354 else {
10355 /*
10356 * If we are operating on converted input, try to flush
10357 * remainng chars to avoid them stalling in the non-converted
10358 * buffer.
10359 */
10360 if ((ctxt->input->buf->raw != NULL) &&
10361 (ctxt->input->buf->raw->use > 0)) {
10362 int base = ctxt->input->base -
10363 ctxt->input->buf->buffer->content;
10364 int current = ctxt->input->cur - ctxt->input->base;
10365
10366 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10367 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10368 ctxt->input->cur = ctxt->input->base + current;
10369 ctxt->input->end =
10370 &ctxt->input->buf->buffer->content[
10371 ctxt->input->buf->buffer->use];
10372 }
10373 avail = ctxt->input->buf->buffer->use -
10374 (ctxt->input->cur - ctxt->input->base);
10375 }
Owen Taylor3473f882001-02-23 17:55:21 +000010376 if (avail < 1)
10377 goto done;
10378 switch (ctxt->instate) {
10379 case XML_PARSER_EOF:
10380 /*
10381 * Document parsing is done !
10382 */
10383 goto done;
10384 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010385 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10386 xmlChar start[4];
10387 xmlCharEncoding enc;
10388
10389 /*
10390 * Very first chars read from the document flow.
10391 */
10392 if (avail < 4)
10393 goto done;
10394
10395 /*
10396 * Get the 4 first bytes and decode the charset
10397 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010398 * plug some encoding conversion routines,
10399 * else xmlSwitchEncoding will set to (default)
10400 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010401 */
10402 start[0] = RAW;
10403 start[1] = NXT(1);
10404 start[2] = NXT(2);
10405 start[3] = NXT(3);
10406 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010407 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010408 break;
10409 }
Owen Taylor3473f882001-02-23 17:55:21 +000010410
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010411 if (avail < 2)
10412 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010413 cur = ctxt->input->cur[0];
10414 next = ctxt->input->cur[1];
10415 if (cur == 0) {
10416 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10417 ctxt->sax->setDocumentLocator(ctxt->userData,
10418 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010419 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010420 ctxt->instate = XML_PARSER_EOF;
10421#ifdef DEBUG_PUSH
10422 xmlGenericError(xmlGenericErrorContext,
10423 "PP: entering EOF\n");
10424#endif
10425 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10426 ctxt->sax->endDocument(ctxt->userData);
10427 goto done;
10428 }
10429 if ((cur == '<') && (next == '?')) {
10430 /* PI or XML decl */
10431 if (avail < 5) return(ret);
10432 if ((!terminate) &&
10433 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10434 return(ret);
10435 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10436 ctxt->sax->setDocumentLocator(ctxt->userData,
10437 &xmlDefaultSAXLocator);
10438 if ((ctxt->input->cur[2] == 'x') &&
10439 (ctxt->input->cur[3] == 'm') &&
10440 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010441 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010442 ret += 5;
10443#ifdef DEBUG_PUSH
10444 xmlGenericError(xmlGenericErrorContext,
10445 "PP: Parsing XML Decl\n");
10446#endif
10447 xmlParseXMLDecl(ctxt);
10448 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10449 /*
10450 * The XML REC instructs us to stop parsing right
10451 * here
10452 */
10453 ctxt->instate = XML_PARSER_EOF;
10454 return(0);
10455 }
10456 ctxt->standalone = ctxt->input->standalone;
10457 if ((ctxt->encoding == NULL) &&
10458 (ctxt->input->encoding != NULL))
10459 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10460 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10461 (!ctxt->disableSAX))
10462 ctxt->sax->startDocument(ctxt->userData);
10463 ctxt->instate = XML_PARSER_MISC;
10464#ifdef DEBUG_PUSH
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: entering MISC\n");
10467#endif
10468 } else {
10469 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10470 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10471 (!ctxt->disableSAX))
10472 ctxt->sax->startDocument(ctxt->userData);
10473 ctxt->instate = XML_PARSER_MISC;
10474#ifdef DEBUG_PUSH
10475 xmlGenericError(xmlGenericErrorContext,
10476 "PP: entering MISC\n");
10477#endif
10478 }
10479 } else {
10480 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10481 ctxt->sax->setDocumentLocator(ctxt->userData,
10482 &xmlDefaultSAXLocator);
10483 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010484 if (ctxt->version == NULL) {
10485 xmlErrMemory(ctxt, NULL);
10486 break;
10487 }
Owen Taylor3473f882001-02-23 17:55:21 +000010488 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10489 (!ctxt->disableSAX))
10490 ctxt->sax->startDocument(ctxt->userData);
10491 ctxt->instate = XML_PARSER_MISC;
10492#ifdef DEBUG_PUSH
10493 xmlGenericError(xmlGenericErrorContext,
10494 "PP: entering MISC\n");
10495#endif
10496 }
10497 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010498 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010499 const xmlChar *name;
10500 const xmlChar *prefix;
10501 const xmlChar *URI;
10502 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010503
10504 if ((avail < 2) && (ctxt->inputNr == 1))
10505 goto done;
10506 cur = ctxt->input->cur[0];
10507 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010508 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010509 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010510 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10511 ctxt->sax->endDocument(ctxt->userData);
10512 goto done;
10513 }
10514 if (!terminate) {
10515 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010516 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010517 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010518 goto done;
10519 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10520 goto done;
10521 }
10522 }
10523 if (ctxt->spaceNr == 0)
10524 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010525 else if (*ctxt->space == -2)
10526 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010527 else
10528 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010529#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010530 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010531#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010532 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010533#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010534 else
10535 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010536#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010537 if (name == NULL) {
10538 spacePop(ctxt);
10539 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010540 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10541 ctxt->sax->endDocument(ctxt->userData);
10542 goto done;
10543 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010544#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010545 /*
10546 * [ VC: Root Element Type ]
10547 * The Name in the document type declaration must match
10548 * the element type of the root element.
10549 */
10550 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10551 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10552 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010553#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010554
10555 /*
10556 * Check for an Empty Element.
10557 */
10558 if ((RAW == '/') && (NXT(1) == '>')) {
10559 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010560
10561 if (ctxt->sax2) {
10562 if ((ctxt->sax != NULL) &&
10563 (ctxt->sax->endElementNs != NULL) &&
10564 (!ctxt->disableSAX))
10565 ctxt->sax->endElementNs(ctxt->userData, name,
10566 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010567 if (ctxt->nsNr - nsNr > 0)
10568 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010569#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010570 } else {
10571 if ((ctxt->sax != NULL) &&
10572 (ctxt->sax->endElement != NULL) &&
10573 (!ctxt->disableSAX))
10574 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010575#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010576 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010577 spacePop(ctxt);
10578 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010579 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010580 } else {
10581 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010582 }
10583 break;
10584 }
10585 if (RAW == '>') {
10586 NEXT;
10587 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010588 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010589 "Couldn't find end of Start Tag %s\n",
10590 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010591 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010592 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010593 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010594 if (ctxt->sax2)
10595 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010596#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010597 else
10598 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010599#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010600
Daniel Veillarda880b122003-04-21 21:36:41 +000010601 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010602 break;
10603 }
10604 case XML_PARSER_CONTENT: {
10605 const xmlChar *test;
10606 unsigned int cons;
10607 if ((avail < 2) && (ctxt->inputNr == 1))
10608 goto done;
10609 cur = ctxt->input->cur[0];
10610 next = ctxt->input->cur[1];
10611
10612 test = CUR_PTR;
10613 cons = ctxt->input->consumed;
10614 if ((cur == '<') && (next == '/')) {
10615 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010616 break;
10617 } else if ((cur == '<') && (next == '?')) {
10618 if ((!terminate) &&
10619 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10620 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010621 xmlParsePI(ctxt);
10622 } else if ((cur == '<') && (next != '!')) {
10623 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010624 break;
10625 } else if ((cur == '<') && (next == '!') &&
10626 (ctxt->input->cur[2] == '-') &&
10627 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010628 int term;
10629
10630 if (avail < 4)
10631 goto done;
10632 ctxt->input->cur += 4;
10633 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10634 ctxt->input->cur -= 4;
10635 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010636 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010637 xmlParseComment(ctxt);
10638 ctxt->instate = XML_PARSER_CONTENT;
10639 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10640 (ctxt->input->cur[2] == '[') &&
10641 (ctxt->input->cur[3] == 'C') &&
10642 (ctxt->input->cur[4] == 'D') &&
10643 (ctxt->input->cur[5] == 'A') &&
10644 (ctxt->input->cur[6] == 'T') &&
10645 (ctxt->input->cur[7] == 'A') &&
10646 (ctxt->input->cur[8] == '[')) {
10647 SKIP(9);
10648 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010649 break;
10650 } else if ((cur == '<') && (next == '!') &&
10651 (avail < 9)) {
10652 goto done;
10653 } else if (cur == '&') {
10654 if ((!terminate) &&
10655 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10656 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010657 xmlParseReference(ctxt);
10658 } else {
10659 /* TODO Avoid the extra copy, handle directly !!! */
10660 /*
10661 * Goal of the following test is:
10662 * - minimize calls to the SAX 'character' callback
10663 * when they are mergeable
10664 * - handle an problem for isBlank when we only parse
10665 * a sequence of blank chars and the next one is
10666 * not available to check against '<' presence.
10667 * - tries to homogenize the differences in SAX
10668 * callbacks between the push and pull versions
10669 * of the parser.
10670 */
10671 if ((ctxt->inputNr == 1) &&
10672 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10673 if (!terminate) {
10674 if (ctxt->progressive) {
10675 if ((lastlt == NULL) ||
10676 (ctxt->input->cur > lastlt))
10677 goto done;
10678 } else if (xmlParseLookupSequence(ctxt,
10679 '<', 0, 0) < 0) {
10680 goto done;
10681 }
10682 }
10683 }
10684 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010685 xmlParseCharData(ctxt, 0);
10686 }
10687 /*
10688 * Pop-up of finished entities.
10689 */
10690 while ((RAW == 0) && (ctxt->inputNr > 1))
10691 xmlPopInput(ctxt);
10692 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010693 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10694 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010695 ctxt->instate = XML_PARSER_EOF;
10696 break;
10697 }
10698 break;
10699 }
10700 case XML_PARSER_END_TAG:
10701 if (avail < 2)
10702 goto done;
10703 if (!terminate) {
10704 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010705 /* > can be found unescaped in attribute values */
10706 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010707 goto done;
10708 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10709 goto done;
10710 }
10711 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010712 if (ctxt->sax2) {
10713 xmlParseEndTag2(ctxt,
10714 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10715 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010716 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010717 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010718 }
10719#ifdef LIBXML_SAX1_ENABLED
10720 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010721 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010722#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010723 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010724 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010725 } else {
10726 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010727 }
10728 break;
10729 case XML_PARSER_CDATA_SECTION: {
10730 /*
10731 * The Push mode need to have the SAX callback for
10732 * cdataBlock merge back contiguous callbacks.
10733 */
10734 int base;
10735
10736 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10737 if (base < 0) {
10738 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010739 int tmp;
10740
10741 tmp = xmlCheckCdataPush(ctxt->input->cur,
10742 XML_PARSER_BIG_BUFFER_SIZE);
10743 if (tmp < 0) {
10744 tmp = -tmp;
10745 ctxt->input->cur += tmp;
10746 goto encoding_error;
10747 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010748 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10749 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010750 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010751 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010752 else if (ctxt->sax->characters != NULL)
10753 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010754 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010755 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010756 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010757 ctxt->checkIndex = 0;
10758 }
10759 goto done;
10760 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010761 int tmp;
10762
10763 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10764 if ((tmp < 0) || (tmp != base)) {
10765 tmp = -tmp;
10766 ctxt->input->cur += tmp;
10767 goto encoding_error;
10768 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010769 if ((ctxt->sax != NULL) && (base == 0) &&
10770 (ctxt->sax->cdataBlock != NULL) &&
10771 (!ctxt->disableSAX)) {
10772 /*
10773 * Special case to provide identical behaviour
10774 * between pull and push parsers on enpty CDATA
10775 * sections
10776 */
10777 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10778 (!strncmp((const char *)&ctxt->input->cur[-9],
10779 "<![CDATA[", 9)))
10780 ctxt->sax->cdataBlock(ctxt->userData,
10781 BAD_CAST "", 0);
10782 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010783 (!ctxt->disableSAX)) {
10784 if (ctxt->sax->cdataBlock != NULL)
10785 ctxt->sax->cdataBlock(ctxt->userData,
10786 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010787 else if (ctxt->sax->characters != NULL)
10788 ctxt->sax->characters(ctxt->userData,
10789 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010790 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010791 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010792 ctxt->checkIndex = 0;
10793 ctxt->instate = XML_PARSER_CONTENT;
10794#ifdef DEBUG_PUSH
10795 xmlGenericError(xmlGenericErrorContext,
10796 "PP: entering CONTENT\n");
10797#endif
10798 }
10799 break;
10800 }
Owen Taylor3473f882001-02-23 17:55:21 +000010801 case XML_PARSER_MISC:
10802 SKIP_BLANKS;
10803 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010804 avail = ctxt->input->length -
10805 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010806 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010807 avail = ctxt->input->buf->buffer->use -
10808 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010809 if (avail < 2)
10810 goto done;
10811 cur = ctxt->input->cur[0];
10812 next = ctxt->input->cur[1];
10813 if ((cur == '<') && (next == '?')) {
10814 if ((!terminate) &&
10815 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10816 goto done;
10817#ifdef DEBUG_PUSH
10818 xmlGenericError(xmlGenericErrorContext,
10819 "PP: Parsing PI\n");
10820#endif
10821 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010822 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010823 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010824 (ctxt->input->cur[2] == '-') &&
10825 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010826 if ((!terminate) &&
10827 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10828 goto done;
10829#ifdef DEBUG_PUSH
10830 xmlGenericError(xmlGenericErrorContext,
10831 "PP: Parsing Comment\n");
10832#endif
10833 xmlParseComment(ctxt);
10834 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010835 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010836 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010837 (ctxt->input->cur[2] == 'D') &&
10838 (ctxt->input->cur[3] == 'O') &&
10839 (ctxt->input->cur[4] == 'C') &&
10840 (ctxt->input->cur[5] == 'T') &&
10841 (ctxt->input->cur[6] == 'Y') &&
10842 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010843 (ctxt->input->cur[8] == 'E')) {
10844 if ((!terminate) &&
10845 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10846 goto done;
10847#ifdef DEBUG_PUSH
10848 xmlGenericError(xmlGenericErrorContext,
10849 "PP: Parsing internal subset\n");
10850#endif
10851 ctxt->inSubset = 1;
10852 xmlParseDocTypeDecl(ctxt);
10853 if (RAW == '[') {
10854 ctxt->instate = XML_PARSER_DTD;
10855#ifdef DEBUG_PUSH
10856 xmlGenericError(xmlGenericErrorContext,
10857 "PP: entering DTD\n");
10858#endif
10859 } else {
10860 /*
10861 * Create and update the external subset.
10862 */
10863 ctxt->inSubset = 2;
10864 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10865 (ctxt->sax->externalSubset != NULL))
10866 ctxt->sax->externalSubset(ctxt->userData,
10867 ctxt->intSubName, ctxt->extSubSystem,
10868 ctxt->extSubURI);
10869 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010870 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010871 ctxt->instate = XML_PARSER_PROLOG;
10872#ifdef DEBUG_PUSH
10873 xmlGenericError(xmlGenericErrorContext,
10874 "PP: entering PROLOG\n");
10875#endif
10876 }
10877 } else if ((cur == '<') && (next == '!') &&
10878 (avail < 9)) {
10879 goto done;
10880 } else {
10881 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010882 ctxt->progressive = 1;
10883 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010884#ifdef DEBUG_PUSH
10885 xmlGenericError(xmlGenericErrorContext,
10886 "PP: entering START_TAG\n");
10887#endif
10888 }
10889 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010890 case XML_PARSER_PROLOG:
10891 SKIP_BLANKS;
10892 if (ctxt->input->buf == NULL)
10893 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10894 else
10895 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10896 if (avail < 2)
10897 goto done;
10898 cur = ctxt->input->cur[0];
10899 next = ctxt->input->cur[1];
10900 if ((cur == '<') && (next == '?')) {
10901 if ((!terminate) &&
10902 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10903 goto done;
10904#ifdef DEBUG_PUSH
10905 xmlGenericError(xmlGenericErrorContext,
10906 "PP: Parsing PI\n");
10907#endif
10908 xmlParsePI(ctxt);
10909 } else if ((cur == '<') && (next == '!') &&
10910 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10911 if ((!terminate) &&
10912 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10913 goto done;
10914#ifdef DEBUG_PUSH
10915 xmlGenericError(xmlGenericErrorContext,
10916 "PP: Parsing Comment\n");
10917#endif
10918 xmlParseComment(ctxt);
10919 ctxt->instate = XML_PARSER_PROLOG;
10920 } else if ((cur == '<') && (next == '!') &&
10921 (avail < 4)) {
10922 goto done;
10923 } else {
10924 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010925 if (ctxt->progressive == 0)
10926 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010927 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010928#ifdef DEBUG_PUSH
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: entering START_TAG\n");
10931#endif
10932 }
10933 break;
10934 case XML_PARSER_EPILOG:
10935 SKIP_BLANKS;
10936 if (ctxt->input->buf == NULL)
10937 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10938 else
10939 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10940 if (avail < 2)
10941 goto done;
10942 cur = ctxt->input->cur[0];
10943 next = ctxt->input->cur[1];
10944 if ((cur == '<') && (next == '?')) {
10945 if ((!terminate) &&
10946 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10947 goto done;
10948#ifdef DEBUG_PUSH
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: Parsing PI\n");
10951#endif
10952 xmlParsePI(ctxt);
10953 ctxt->instate = XML_PARSER_EPILOG;
10954 } else if ((cur == '<') && (next == '!') &&
10955 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10956 if ((!terminate) &&
10957 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10958 goto done;
10959#ifdef DEBUG_PUSH
10960 xmlGenericError(xmlGenericErrorContext,
10961 "PP: Parsing Comment\n");
10962#endif
10963 xmlParseComment(ctxt);
10964 ctxt->instate = XML_PARSER_EPILOG;
10965 } else if ((cur == '<') && (next == '!') &&
10966 (avail < 4)) {
10967 goto done;
10968 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010969 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010970 ctxt->instate = XML_PARSER_EOF;
10971#ifdef DEBUG_PUSH
10972 xmlGenericError(xmlGenericErrorContext,
10973 "PP: entering EOF\n");
10974#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010975 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010976 ctxt->sax->endDocument(ctxt->userData);
10977 goto done;
10978 }
10979 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010980 case XML_PARSER_DTD: {
10981 /*
10982 * Sorry but progressive parsing of the internal subset
10983 * is not expected to be supported. We first check that
10984 * the full content of the internal subset is available and
10985 * the parsing is launched only at that point.
10986 * Internal subset ends up with "']' S? '>'" in an unescaped
10987 * section and not in a ']]>' sequence which are conditional
10988 * sections (whoever argued to keep that crap in XML deserve
10989 * a place in hell !).
10990 */
10991 int base, i;
10992 xmlChar *buf;
10993 xmlChar quote = 0;
10994
10995 base = ctxt->input->cur - ctxt->input->base;
10996 if (base < 0) return(0);
10997 if (ctxt->checkIndex > base)
10998 base = ctxt->checkIndex;
10999 buf = ctxt->input->buf->buffer->content;
11000 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11001 base++) {
11002 if (quote != 0) {
11003 if (buf[base] == quote)
11004 quote = 0;
11005 continue;
11006 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011007 if ((quote == 0) && (buf[base] == '<')) {
11008 int found = 0;
11009 /* special handling of comments */
11010 if (((unsigned int) base + 4 <
11011 ctxt->input->buf->buffer->use) &&
11012 (buf[base + 1] == '!') &&
11013 (buf[base + 2] == '-') &&
11014 (buf[base + 3] == '-')) {
11015 for (;(unsigned int) base + 3 <
11016 ctxt->input->buf->buffer->use; base++) {
11017 if ((buf[base] == '-') &&
11018 (buf[base + 1] == '-') &&
11019 (buf[base + 2] == '>')) {
11020 found = 1;
11021 base += 2;
11022 break;
11023 }
11024 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011025 if (!found) {
11026#if 0
11027 fprintf(stderr, "unfinished comment\n");
11028#endif
11029 break; /* for */
11030 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011031 continue;
11032 }
11033 }
Owen Taylor3473f882001-02-23 17:55:21 +000011034 if (buf[base] == '"') {
11035 quote = '"';
11036 continue;
11037 }
11038 if (buf[base] == '\'') {
11039 quote = '\'';
11040 continue;
11041 }
11042 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011043#if 0
11044 fprintf(stderr, "%c%c%c%c: ", buf[base],
11045 buf[base + 1], buf[base + 2], buf[base + 3]);
11046#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011047 if ((unsigned int) base +1 >=
11048 ctxt->input->buf->buffer->use)
11049 break;
11050 if (buf[base + 1] == ']') {
11051 /* conditional crap, skip both ']' ! */
11052 base++;
11053 continue;
11054 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011055 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011056 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11057 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011058 if (buf[base + i] == '>') {
11059#if 0
11060 fprintf(stderr, "found\n");
11061#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011062 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011063 }
11064 if (!IS_BLANK_CH(buf[base + i])) {
11065#if 0
11066 fprintf(stderr, "not found\n");
11067#endif
11068 goto not_end_of_int_subset;
11069 }
Owen Taylor3473f882001-02-23 17:55:21 +000011070 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011071#if 0
11072 fprintf(stderr, "end of stream\n");
11073#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011074 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011075
Owen Taylor3473f882001-02-23 17:55:21 +000011076 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011077not_end_of_int_subset:
11078 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011079 }
11080 /*
11081 * We didn't found the end of the Internal subset
11082 */
Owen Taylor3473f882001-02-23 17:55:21 +000011083#ifdef DEBUG_PUSH
11084 if (next == 0)
11085 xmlGenericError(xmlGenericErrorContext,
11086 "PP: lookup of int subset end filed\n");
11087#endif
11088 goto done;
11089
11090found_end_int_subset:
11091 xmlParseInternalSubset(ctxt);
11092 ctxt->inSubset = 2;
11093 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11094 (ctxt->sax->externalSubset != NULL))
11095 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11096 ctxt->extSubSystem, ctxt->extSubURI);
11097 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011098 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011099 ctxt->instate = XML_PARSER_PROLOG;
11100 ctxt->checkIndex = 0;
11101#ifdef DEBUG_PUSH
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: entering PROLOG\n");
11104#endif
11105 break;
11106 }
11107 case XML_PARSER_COMMENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: internal error, state == COMMENT\n");
11110 ctxt->instate = XML_PARSER_CONTENT;
11111#ifdef DEBUG_PUSH
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: entering CONTENT\n");
11114#endif
11115 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011116 case XML_PARSER_IGNORE:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: internal error, state == IGNORE");
11119 ctxt->instate = XML_PARSER_DTD;
11120#ifdef DEBUG_PUSH
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: entering DTD\n");
11123#endif
11124 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011125 case XML_PARSER_PI:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: internal error, state == PI\n");
11128 ctxt->instate = XML_PARSER_CONTENT;
11129#ifdef DEBUG_PUSH
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: entering CONTENT\n");
11132#endif
11133 break;
11134 case XML_PARSER_ENTITY_DECL:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: internal error, state == ENTITY_DECL\n");
11137 ctxt->instate = XML_PARSER_DTD;
11138#ifdef DEBUG_PUSH
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: entering DTD\n");
11141#endif
11142 break;
11143 case XML_PARSER_ENTITY_VALUE:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: internal error, state == ENTITY_VALUE\n");
11146 ctxt->instate = XML_PARSER_CONTENT;
11147#ifdef DEBUG_PUSH
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: entering DTD\n");
11150#endif
11151 break;
11152 case XML_PARSER_ATTRIBUTE_VALUE:
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11155 ctxt->instate = XML_PARSER_START_TAG;
11156#ifdef DEBUG_PUSH
11157 xmlGenericError(xmlGenericErrorContext,
11158 "PP: entering START_TAG\n");
11159#endif
11160 break;
11161 case XML_PARSER_SYSTEM_LITERAL:
11162 xmlGenericError(xmlGenericErrorContext,
11163 "PP: internal error, state == SYSTEM_LITERAL\n");
11164 ctxt->instate = XML_PARSER_START_TAG;
11165#ifdef DEBUG_PUSH
11166 xmlGenericError(xmlGenericErrorContext,
11167 "PP: entering START_TAG\n");
11168#endif
11169 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011170 case XML_PARSER_PUBLIC_LITERAL:
11171 xmlGenericError(xmlGenericErrorContext,
11172 "PP: internal error, state == PUBLIC_LITERAL\n");
11173 ctxt->instate = XML_PARSER_START_TAG;
11174#ifdef DEBUG_PUSH
11175 xmlGenericError(xmlGenericErrorContext,
11176 "PP: entering START_TAG\n");
11177#endif
11178 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011179 }
11180 }
11181done:
11182#ifdef DEBUG_PUSH
11183 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11184#endif
11185 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011186encoding_error:
11187 {
11188 char buffer[150];
11189
11190 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11191 ctxt->input->cur[0], ctxt->input->cur[1],
11192 ctxt->input->cur[2], ctxt->input->cur[3]);
11193 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11194 "Input is not proper UTF-8, indicate encoding !\n%s",
11195 BAD_CAST buffer, NULL);
11196 }
11197 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011198}
11199
11200/**
Owen Taylor3473f882001-02-23 17:55:21 +000011201 * xmlParseChunk:
11202 * @ctxt: an XML parser context
11203 * @chunk: an char array
11204 * @size: the size in byte of the chunk
11205 * @terminate: last chunk indicator
11206 *
11207 * Parse a Chunk of memory
11208 *
11209 * Returns zero if no error, the xmlParserErrors otherwise.
11210 */
11211int
11212xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11213 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011214 int end_in_lf = 0;
11215
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011216 if (ctxt == NULL)
11217 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011218 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011219 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011220 if (ctxt->instate == XML_PARSER_START)
11221 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011222 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11223 (chunk[size - 1] == '\r')) {
11224 end_in_lf = 1;
11225 size--;
11226 }
Owen Taylor3473f882001-02-23 17:55:21 +000011227 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11228 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11229 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11230 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011231 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011232
William M. Bracka3215c72004-07-31 16:24:01 +000011233 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11234 if (res < 0) {
11235 ctxt->errNo = XML_PARSER_EOF;
11236 ctxt->disableSAX = 1;
11237 return (XML_PARSER_EOF);
11238 }
Owen Taylor3473f882001-02-23 17:55:21 +000011239 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11240 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011241 ctxt->input->end =
11242 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011243#ifdef DEBUG_PUSH
11244 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11245#endif
11246
Owen Taylor3473f882001-02-23 17:55:21 +000011247 } else if (ctxt->instate != XML_PARSER_EOF) {
11248 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11249 xmlParserInputBufferPtr in = ctxt->input->buf;
11250 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11251 (in->raw != NULL)) {
11252 int nbchars;
11253
11254 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11255 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011256 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011257 xmlGenericError(xmlGenericErrorContext,
11258 "xmlParseChunk: encoder error\n");
11259 return(XML_ERR_INVALID_ENCODING);
11260 }
11261 }
11262 }
11263 }
11264 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11266 (ctxt->input->buf != NULL)) {
11267 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11268 }
Daniel Veillard14412512005-01-21 23:53:26 +000011269 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011270 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011271 if (terminate) {
11272 /*
11273 * Check for termination
11274 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011275 int avail = 0;
11276
11277 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011278 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011279 avail = ctxt->input->length -
11280 (ctxt->input->cur - ctxt->input->base);
11281 else
11282 avail = ctxt->input->buf->buffer->use -
11283 (ctxt->input->cur - ctxt->input->base);
11284 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011285
Owen Taylor3473f882001-02-23 17:55:21 +000011286 if ((ctxt->instate != XML_PARSER_EOF) &&
11287 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011288 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011289 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011290 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011291 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011292 }
Owen Taylor3473f882001-02-23 17:55:21 +000011293 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011294 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011295 ctxt->sax->endDocument(ctxt->userData);
11296 }
11297 ctxt->instate = XML_PARSER_EOF;
11298 }
11299 return((xmlParserErrors) ctxt->errNo);
11300}
11301
11302/************************************************************************
11303 * *
11304 * I/O front end functions to the parser *
11305 * *
11306 ************************************************************************/
11307
11308/**
Owen Taylor3473f882001-02-23 17:55:21 +000011309 * xmlCreatePushParserCtxt:
11310 * @sax: a SAX handler
11311 * @user_data: The user data returned on SAX callbacks
11312 * @chunk: a pointer to an array of chars
11313 * @size: number of chars in the array
11314 * @filename: an optional file name or URI
11315 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011316 * Create a parser context for using the XML parser in push mode.
11317 * If @buffer and @size are non-NULL, the data is used to detect
11318 * the encoding. The remaining characters will be parsed so they
11319 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011320 * To allow content encoding detection, @size should be >= 4
11321 * The value of @filename is used for fetching external entities
11322 * and error/warning reports.
11323 *
11324 * Returns the new parser context or NULL
11325 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011326
Owen Taylor3473f882001-02-23 17:55:21 +000011327xmlParserCtxtPtr
11328xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11329 const char *chunk, int size, const char *filename) {
11330 xmlParserCtxtPtr ctxt;
11331 xmlParserInputPtr inputStream;
11332 xmlParserInputBufferPtr buf;
11333 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11334
11335 /*
11336 * plug some encoding conversion routines
11337 */
11338 if ((chunk != NULL) && (size >= 4))
11339 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11340
11341 buf = xmlAllocParserInputBuffer(enc);
11342 if (buf == NULL) return(NULL);
11343
11344 ctxt = xmlNewParserCtxt();
11345 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011346 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011347 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011348 return(NULL);
11349 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011350 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011351 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11352 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011353 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011354 xmlFreeParserInputBuffer(buf);
11355 xmlFreeParserCtxt(ctxt);
11356 return(NULL);
11357 }
Owen Taylor3473f882001-02-23 17:55:21 +000011358 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011359#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011360 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011361#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011362 xmlFree(ctxt->sax);
11363 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11364 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011365 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011366 xmlFreeParserInputBuffer(buf);
11367 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011368 return(NULL);
11369 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011370 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11371 if (sax->initialized == XML_SAX2_MAGIC)
11372 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11373 else
11374 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011375 if (user_data != NULL)
11376 ctxt->userData = user_data;
11377 }
11378 if (filename == NULL) {
11379 ctxt->directory = NULL;
11380 } else {
11381 ctxt->directory = xmlParserGetDirectory(filename);
11382 }
11383
11384 inputStream = xmlNewInputStream(ctxt);
11385 if (inputStream == NULL) {
11386 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011387 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011388 return(NULL);
11389 }
11390
11391 if (filename == NULL)
11392 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011393 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011394 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011395 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011396 if (inputStream->filename == NULL) {
11397 xmlFreeParserCtxt(ctxt);
11398 xmlFreeParserInputBuffer(buf);
11399 return(NULL);
11400 }
11401 }
Owen Taylor3473f882001-02-23 17:55:21 +000011402 inputStream->buf = buf;
11403 inputStream->base = inputStream->buf->buffer->content;
11404 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011405 inputStream->end =
11406 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011407
11408 inputPush(ctxt, inputStream);
11409
William M. Brack3a1cd212005-02-11 14:35:54 +000011410 /*
11411 * If the caller didn't provide an initial 'chunk' for determining
11412 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11413 * that it can be automatically determined later
11414 */
11415 if ((size == 0) || (chunk == NULL)) {
11416 ctxt->charset = XML_CHAR_ENCODING_NONE;
11417 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011418 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11419 int cur = ctxt->input->cur - ctxt->input->base;
11420
Owen Taylor3473f882001-02-23 17:55:21 +000011421 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011422
11423 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11424 ctxt->input->cur = ctxt->input->base + cur;
11425 ctxt->input->end =
11426 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011427#ifdef DEBUG_PUSH
11428 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11429#endif
11430 }
11431
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011432 if (enc != XML_CHAR_ENCODING_NONE) {
11433 xmlSwitchEncoding(ctxt, enc);
11434 }
11435
Owen Taylor3473f882001-02-23 17:55:21 +000011436 return(ctxt);
11437}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011438#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011439
11440/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011441 * xmlStopParser:
11442 * @ctxt: an XML parser context
11443 *
11444 * Blocks further parser processing
11445 */
11446void
11447xmlStopParser(xmlParserCtxtPtr ctxt) {
11448 if (ctxt == NULL)
11449 return;
11450 ctxt->instate = XML_PARSER_EOF;
11451 ctxt->disableSAX = 1;
11452 if (ctxt->input != NULL) {
11453 ctxt->input->cur = BAD_CAST"";
11454 ctxt->input->base = ctxt->input->cur;
11455 }
11456}
11457
11458/**
Owen Taylor3473f882001-02-23 17:55:21 +000011459 * xmlCreateIOParserCtxt:
11460 * @sax: a SAX handler
11461 * @user_data: The user data returned on SAX callbacks
11462 * @ioread: an I/O read function
11463 * @ioclose: an I/O close function
11464 * @ioctx: an I/O handler
11465 * @enc: the charset encoding if known
11466 *
11467 * Create a parser context for using the XML parser with an existing
11468 * I/O stream
11469 *
11470 * Returns the new parser context or NULL
11471 */
11472xmlParserCtxtPtr
11473xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11474 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11475 void *ioctx, xmlCharEncoding enc) {
11476 xmlParserCtxtPtr ctxt;
11477 xmlParserInputPtr inputStream;
11478 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011479
11480 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011481
11482 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11483 if (buf == NULL) return(NULL);
11484
11485 ctxt = xmlNewParserCtxt();
11486 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011487 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011488 return(NULL);
11489 }
11490 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011491#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011492 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011493#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011494 xmlFree(ctxt->sax);
11495 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11496 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011497 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011498 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011499 return(NULL);
11500 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011501 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11502 if (sax->initialized == XML_SAX2_MAGIC)
11503 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11504 else
11505 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011506 if (user_data != NULL)
11507 ctxt->userData = user_data;
11508 }
11509
11510 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11511 if (inputStream == NULL) {
11512 xmlFreeParserCtxt(ctxt);
11513 return(NULL);
11514 }
11515 inputPush(ctxt, inputStream);
11516
11517 return(ctxt);
11518}
11519
Daniel Veillard4432df22003-09-28 18:58:27 +000011520#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011521/************************************************************************
11522 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011523 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011524 * *
11525 ************************************************************************/
11526
11527/**
11528 * xmlIOParseDTD:
11529 * @sax: the SAX handler block or NULL
11530 * @input: an Input Buffer
11531 * @enc: the charset encoding if known
11532 *
11533 * Load and parse a DTD
11534 *
11535 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011536 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011537 */
11538
11539xmlDtdPtr
11540xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11541 xmlCharEncoding enc) {
11542 xmlDtdPtr ret = NULL;
11543 xmlParserCtxtPtr ctxt;
11544 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011545 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011546
11547 if (input == NULL)
11548 return(NULL);
11549
11550 ctxt = xmlNewParserCtxt();
11551 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011552 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011553 return(NULL);
11554 }
11555
11556 /*
11557 * Set-up the SAX context
11558 */
11559 if (sax != NULL) {
11560 if (ctxt->sax != NULL)
11561 xmlFree(ctxt->sax);
11562 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011563 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011564 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011565 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011566
11567 /*
11568 * generate a parser input from the I/O handler
11569 */
11570
Daniel Veillard43caefb2003-12-07 19:32:22 +000011571 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011572 if (pinput == NULL) {
11573 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011574 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011575 xmlFreeParserCtxt(ctxt);
11576 return(NULL);
11577 }
11578
11579 /*
11580 * plug some encoding conversion routines here.
11581 */
11582 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011583 if (enc != XML_CHAR_ENCODING_NONE) {
11584 xmlSwitchEncoding(ctxt, enc);
11585 }
Owen Taylor3473f882001-02-23 17:55:21 +000011586
11587 pinput->filename = NULL;
11588 pinput->line = 1;
11589 pinput->col = 1;
11590 pinput->base = ctxt->input->cur;
11591 pinput->cur = ctxt->input->cur;
11592 pinput->free = NULL;
11593
11594 /*
11595 * let's parse that entity knowing it's an external subset.
11596 */
11597 ctxt->inSubset = 2;
11598 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011599 if (ctxt->myDoc == NULL) {
11600 xmlErrMemory(ctxt, "New Doc failed");
11601 return(NULL);
11602 }
11603 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011604 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11605 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011606
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011607 if ((enc == XML_CHAR_ENCODING_NONE) &&
11608 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011609 /*
11610 * Get the 4 first bytes and decode the charset
11611 * if enc != XML_CHAR_ENCODING_NONE
11612 * plug some encoding conversion routines.
11613 */
11614 start[0] = RAW;
11615 start[1] = NXT(1);
11616 start[2] = NXT(2);
11617 start[3] = NXT(3);
11618 enc = xmlDetectCharEncoding(start, 4);
11619 if (enc != XML_CHAR_ENCODING_NONE) {
11620 xmlSwitchEncoding(ctxt, enc);
11621 }
11622 }
11623
Owen Taylor3473f882001-02-23 17:55:21 +000011624 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11625
11626 if (ctxt->myDoc != NULL) {
11627 if (ctxt->wellFormed) {
11628 ret = ctxt->myDoc->extSubset;
11629 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011630 if (ret != NULL) {
11631 xmlNodePtr tmp;
11632
11633 ret->doc = NULL;
11634 tmp = ret->children;
11635 while (tmp != NULL) {
11636 tmp->doc = NULL;
11637 tmp = tmp->next;
11638 }
11639 }
Owen Taylor3473f882001-02-23 17:55:21 +000011640 } else {
11641 ret = NULL;
11642 }
11643 xmlFreeDoc(ctxt->myDoc);
11644 ctxt->myDoc = NULL;
11645 }
11646 if (sax != NULL) ctxt->sax = NULL;
11647 xmlFreeParserCtxt(ctxt);
11648
11649 return(ret);
11650}
11651
11652/**
11653 * xmlSAXParseDTD:
11654 * @sax: the SAX handler block
11655 * @ExternalID: a NAME* containing the External ID of the DTD
11656 * @SystemID: a NAME* containing the URL to the DTD
11657 *
11658 * Load and parse an external subset.
11659 *
11660 * Returns the resulting xmlDtdPtr or NULL in case of error.
11661 */
11662
11663xmlDtdPtr
11664xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11665 const xmlChar *SystemID) {
11666 xmlDtdPtr ret = NULL;
11667 xmlParserCtxtPtr ctxt;
11668 xmlParserInputPtr input = NULL;
11669 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011670 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011671
11672 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11673
11674 ctxt = xmlNewParserCtxt();
11675 if (ctxt == NULL) {
11676 return(NULL);
11677 }
11678
11679 /*
11680 * Set-up the SAX context
11681 */
11682 if (sax != NULL) {
11683 if (ctxt->sax != NULL)
11684 xmlFree(ctxt->sax);
11685 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011686 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011687 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011688
11689 /*
11690 * Canonicalise the system ID
11691 */
11692 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011693 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011694 xmlFreeParserCtxt(ctxt);
11695 return(NULL);
11696 }
Owen Taylor3473f882001-02-23 17:55:21 +000011697
11698 /*
11699 * Ask the Entity resolver to load the damn thing
11700 */
11701
11702 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011703 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11704 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011705 if (input == NULL) {
11706 if (sax != NULL) ctxt->sax = NULL;
11707 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011708 if (systemIdCanonic != NULL)
11709 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011710 return(NULL);
11711 }
11712
11713 /*
11714 * plug some encoding conversion routines here.
11715 */
11716 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011717 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11718 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11719 xmlSwitchEncoding(ctxt, enc);
11720 }
Owen Taylor3473f882001-02-23 17:55:21 +000011721
11722 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011723 input->filename = (char *) systemIdCanonic;
11724 else
11725 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011726 input->line = 1;
11727 input->col = 1;
11728 input->base = ctxt->input->cur;
11729 input->cur = ctxt->input->cur;
11730 input->free = NULL;
11731
11732 /*
11733 * let's parse that entity knowing it's an external subset.
11734 */
11735 ctxt->inSubset = 2;
11736 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011737 if (ctxt->myDoc == NULL) {
11738 xmlErrMemory(ctxt, "New Doc failed");
11739 if (sax != NULL) ctxt->sax = NULL;
11740 xmlFreeParserCtxt(ctxt);
11741 return(NULL);
11742 }
11743 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011744 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11745 ExternalID, SystemID);
11746 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11747
11748 if (ctxt->myDoc != NULL) {
11749 if (ctxt->wellFormed) {
11750 ret = ctxt->myDoc->extSubset;
11751 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011752 if (ret != NULL) {
11753 xmlNodePtr tmp;
11754
11755 ret->doc = NULL;
11756 tmp = ret->children;
11757 while (tmp != NULL) {
11758 tmp->doc = NULL;
11759 tmp = tmp->next;
11760 }
11761 }
Owen Taylor3473f882001-02-23 17:55:21 +000011762 } else {
11763 ret = NULL;
11764 }
11765 xmlFreeDoc(ctxt->myDoc);
11766 ctxt->myDoc = NULL;
11767 }
11768 if (sax != NULL) ctxt->sax = NULL;
11769 xmlFreeParserCtxt(ctxt);
11770
11771 return(ret);
11772}
11773
Daniel Veillard4432df22003-09-28 18:58:27 +000011774
Owen Taylor3473f882001-02-23 17:55:21 +000011775/**
11776 * xmlParseDTD:
11777 * @ExternalID: a NAME* containing the External ID of the DTD
11778 * @SystemID: a NAME* containing the URL to the DTD
11779 *
11780 * Load and parse an external subset.
11781 *
11782 * Returns the resulting xmlDtdPtr or NULL in case of error.
11783 */
11784
11785xmlDtdPtr
11786xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11787 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11788}
Daniel Veillard4432df22003-09-28 18:58:27 +000011789#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011790
11791/************************************************************************
11792 * *
11793 * Front ends when parsing an Entity *
11794 * *
11795 ************************************************************************/
11796
11797/**
Owen Taylor3473f882001-02-23 17:55:21 +000011798 * xmlParseCtxtExternalEntity:
11799 * @ctx: the existing parsing context
11800 * @URL: the URL for the entity to load
11801 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011802 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011803 *
11804 * Parse an external general entity within an existing parsing context
11805 * An external general parsed entity is well-formed if it matches the
11806 * production labeled extParsedEnt.
11807 *
11808 * [78] extParsedEnt ::= TextDecl? content
11809 *
11810 * Returns 0 if the entity is well formed, -1 in case of args problem and
11811 * the parser error code otherwise
11812 */
11813
11814int
11815xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011816 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011817 xmlParserCtxtPtr ctxt;
11818 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011819 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011820 xmlSAXHandlerPtr oldsax = NULL;
11821 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011822 xmlChar start[4];
11823 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011824 xmlParserInputPtr inputStream;
11825 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011826
Daniel Veillardce682bc2004-11-05 17:22:25 +000011827 if (ctx == NULL) return(-1);
11828
Daniel Veillard4bf899b2008-08-20 17:04:30 +000011829 if ((ctx->depth > 40) || (ctx->nbentities >= 500000)) {
Owen Taylor3473f882001-02-23 17:55:21 +000011830 return(XML_ERR_ENTITY_LOOP);
11831 }
11832
Daniel Veillardcda96922001-08-21 10:56:31 +000011833 if (lst != NULL)
11834 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if ((URL == NULL) && (ID == NULL))
11836 return(-1);
11837 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11838 return(-1);
11839
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011840 ctxt = xmlNewParserCtxt();
11841 if (ctxt == NULL) {
11842 return(-1);
11843 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011844
Owen Taylor3473f882001-02-23 17:55:21 +000011845 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011846 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011847
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011848 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11849 if (inputStream == NULL) {
11850 xmlFreeParserCtxt(ctxt);
11851 return(-1);
11852 }
11853
11854 inputPush(ctxt, inputStream);
11855
11856 if ((ctxt->directory == NULL) && (directory == NULL))
11857 directory = xmlParserGetDirectory((char *)URL);
11858 if ((ctxt->directory == NULL) && (directory != NULL))
11859 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011860
Owen Taylor3473f882001-02-23 17:55:21 +000011861 oldsax = ctxt->sax;
11862 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011863 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011864 newDoc = xmlNewDoc(BAD_CAST "1.0");
11865 if (newDoc == NULL) {
11866 xmlFreeParserCtxt(ctxt);
11867 return(-1);
11868 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000011869 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011870 if (ctx->myDoc->dict) {
11871 newDoc->dict = ctx->myDoc->dict;
11872 xmlDictReference(newDoc->dict);
11873 }
Owen Taylor3473f882001-02-23 17:55:21 +000011874 if (ctx->myDoc != NULL) {
11875 newDoc->intSubset = ctx->myDoc->intSubset;
11876 newDoc->extSubset = ctx->myDoc->extSubset;
11877 }
11878 if (ctx->myDoc->URL != NULL) {
11879 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11880 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011881 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11882 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011883 ctxt->sax = oldsax;
11884 xmlFreeParserCtxt(ctxt);
11885 newDoc->intSubset = NULL;
11886 newDoc->extSubset = NULL;
11887 xmlFreeDoc(newDoc);
11888 return(-1);
11889 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011890 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011891 nodePush(ctxt, newDoc->children);
11892 if (ctx->myDoc == NULL) {
11893 ctxt->myDoc = newDoc;
11894 } else {
11895 ctxt->myDoc = ctx->myDoc;
11896 newDoc->children->doc = ctx->myDoc;
11897 }
11898
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011899 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000011900 * Get the 4 first bytes and decode the charset
11901 * if enc != XML_CHAR_ENCODING_NONE
11902 * plug some encoding conversion routines.
11903 */
11904 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011905 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11906 start[0] = RAW;
11907 start[1] = NXT(1);
11908 start[2] = NXT(2);
11909 start[3] = NXT(3);
11910 enc = xmlDetectCharEncoding(start, 4);
11911 if (enc != XML_CHAR_ENCODING_NONE) {
11912 xmlSwitchEncoding(ctxt, enc);
11913 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011914 }
11915
Owen Taylor3473f882001-02-23 17:55:21 +000011916 /*
11917 * Parse a possible text declaration first
11918 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011919 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011920 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011921 /*
11922 * An XML-1.0 document can't reference an entity not XML-1.0
11923 */
11924 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
11925 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11926 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11927 "Version mismatch between document and entity\n");
11928 }
Owen Taylor3473f882001-02-23 17:55:21 +000011929 }
11930
11931 /*
11932 * Doing validity checking on chunk doesn't make sense
11933 */
11934 ctxt->instate = XML_PARSER_CONTENT;
11935 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011936 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011937 ctxt->loadsubset = ctx->loadsubset;
11938 ctxt->depth = ctx->depth + 1;
11939 ctxt->replaceEntities = ctx->replaceEntities;
11940 if (ctxt->validate) {
11941 ctxt->vctxt.error = ctx->vctxt.error;
11942 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011943 } else {
11944 ctxt->vctxt.error = NULL;
11945 ctxt->vctxt.warning = NULL;
11946 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011947 ctxt->vctxt.nodeTab = NULL;
11948 ctxt->vctxt.nodeNr = 0;
11949 ctxt->vctxt.nodeMax = 0;
11950 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011951 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11952 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011953 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11954 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11955 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011956 ctxt->dictNames = ctx->dictNames;
11957 ctxt->attsDefault = ctx->attsDefault;
11958 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011959 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011960
11961 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011962
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011963 ctx->validate = ctxt->validate;
11964 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011965 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011966 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011967 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011968 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011969 }
11970 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011971 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011972 }
11973
11974 if (!ctxt->wellFormed) {
11975 if (ctxt->errNo == 0)
11976 ret = 1;
11977 else
11978 ret = ctxt->errNo;
11979 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011980 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011981 xmlNodePtr cur;
11982
11983 /*
11984 * Return the newly created nodeset after unlinking it from
11985 * they pseudo parent.
11986 */
11987 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011988 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011989 while (cur != NULL) {
11990 cur->parent = NULL;
11991 cur = cur->next;
11992 }
11993 newDoc->children->children = NULL;
11994 }
11995 ret = 0;
11996 }
11997 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011998 ctxt->dict = NULL;
11999 ctxt->attsDefault = NULL;
12000 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012001 xmlFreeParserCtxt(ctxt);
12002 newDoc->intSubset = NULL;
12003 newDoc->extSubset = NULL;
12004 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012005
Owen Taylor3473f882001-02-23 17:55:21 +000012006 return(ret);
12007}
12008
12009/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012010 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012011 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012012 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012013 * @sax: the SAX handler bloc (possibly NULL)
12014 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12015 * @depth: Used for loop detection, use 0
12016 * @URL: the URL for the entity to load
12017 * @ID: the System ID for the entity to load
12018 * @list: the return value for the set of parsed nodes
12019 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012020 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012021 *
12022 * Returns 0 if the entity is well formed, -1 in case of args problem and
12023 * the parser error code otherwise
12024 */
12025
Daniel Veillard7d515752003-09-26 19:12:37 +000012026static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012027xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12028 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012029 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012030 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012031 xmlParserCtxtPtr ctxt;
12032 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012033 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012034 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012035 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012036 xmlChar start[4];
12037 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012038
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012039 if ((depth > 40) ||
12040 ((oldctxt != NULL) && (oldctxt->nbentities >= 500000))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012041 return(XML_ERR_ENTITY_LOOP);
12042 }
12043
12044
12045
12046 if (list != NULL)
12047 *list = NULL;
12048 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012049 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012050 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012051 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012052
12053
12054 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012055 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012056 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012057 if (oldctxt != NULL) {
12058 ctxt->_private = oldctxt->_private;
12059 ctxt->loadsubset = oldctxt->loadsubset;
12060 ctxt->validate = oldctxt->validate;
12061 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012062 ctxt->record_info = oldctxt->record_info;
12063 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12064 ctxt->node_seq.length = oldctxt->node_seq.length;
12065 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012066 } else {
12067 /*
12068 * Doing validity checking on chunk without context
12069 * doesn't make sense
12070 */
12071 ctxt->_private = NULL;
12072 ctxt->validate = 0;
12073 ctxt->external = 2;
12074 ctxt->loadsubset = 0;
12075 }
Owen Taylor3473f882001-02-23 17:55:21 +000012076 if (sax != NULL) {
12077 oldsax = ctxt->sax;
12078 ctxt->sax = sax;
12079 if (user_data != NULL)
12080 ctxt->userData = user_data;
12081 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012082 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012083 newDoc = xmlNewDoc(BAD_CAST "1.0");
12084 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012085 ctxt->node_seq.maximum = 0;
12086 ctxt->node_seq.length = 0;
12087 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012088 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012089 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012090 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012091 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012092 newDoc->intSubset = doc->intSubset;
12093 newDoc->extSubset = doc->extSubset;
12094 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012095 xmlDictReference(newDoc->dict);
12096
Owen Taylor3473f882001-02-23 17:55:21 +000012097 if (doc->URL != NULL) {
12098 newDoc->URL = xmlStrdup(doc->URL);
12099 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012100 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12101 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012102 if (sax != NULL)
12103 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012104 ctxt->node_seq.maximum = 0;
12105 ctxt->node_seq.length = 0;
12106 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012107 xmlFreeParserCtxt(ctxt);
12108 newDoc->intSubset = NULL;
12109 newDoc->extSubset = NULL;
12110 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012111 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012112 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012113 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012114 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012115 ctxt->myDoc = doc;
12116 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012117
Daniel Veillard87a764e2001-06-20 17:41:10 +000012118 /*
12119 * Get the 4 first bytes and decode the charset
12120 * if enc != XML_CHAR_ENCODING_NONE
12121 * plug some encoding conversion routines.
12122 */
12123 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012124 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12125 start[0] = RAW;
12126 start[1] = NXT(1);
12127 start[2] = NXT(2);
12128 start[3] = NXT(3);
12129 enc = xmlDetectCharEncoding(start, 4);
12130 if (enc != XML_CHAR_ENCODING_NONE) {
12131 xmlSwitchEncoding(ctxt, enc);
12132 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012133 }
12134
Owen Taylor3473f882001-02-23 17:55:21 +000012135 /*
12136 * Parse a possible text declaration first
12137 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012138 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012139 xmlParseTextDecl(ctxt);
12140 }
12141
Owen Taylor3473f882001-02-23 17:55:21 +000012142 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012143 ctxt->depth = depth;
12144
12145 xmlParseContent(ctxt);
12146
Daniel Veillard561b7f82002-03-20 21:55:57 +000012147 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012148 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012149 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012150 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012151 }
12152 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012153 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012154 }
12155
12156 if (!ctxt->wellFormed) {
12157 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012158 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012159 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012160 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012161 } else {
12162 if (list != NULL) {
12163 xmlNodePtr cur;
12164
12165 /*
12166 * Return the newly created nodeset after unlinking it from
12167 * they pseudo parent.
12168 */
12169 cur = newDoc->children->children;
12170 *list = cur;
12171 while (cur != NULL) {
12172 cur->parent = NULL;
12173 cur = cur->next;
12174 }
12175 newDoc->children->children = NULL;
12176 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012177 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012178 }
12179 if (sax != NULL)
12180 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012181 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12182 oldctxt->node_seq.length = ctxt->node_seq.length;
12183 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012184 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012185 ctxt->node_seq.maximum = 0;
12186 ctxt->node_seq.length = 0;
12187 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012188 xmlFreeParserCtxt(ctxt);
12189 newDoc->intSubset = NULL;
12190 newDoc->extSubset = NULL;
12191 xmlFreeDoc(newDoc);
12192
12193 return(ret);
12194}
12195
Daniel Veillard81273902003-09-30 00:43:48 +000012196#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012197/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012198 * xmlParseExternalEntity:
12199 * @doc: the document the chunk pertains to
12200 * @sax: the SAX handler bloc (possibly NULL)
12201 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12202 * @depth: Used for loop detection, use 0
12203 * @URL: the URL for the entity to load
12204 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012205 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012206 *
12207 * Parse an external general entity
12208 * An external general parsed entity is well-formed if it matches the
12209 * production labeled extParsedEnt.
12210 *
12211 * [78] extParsedEnt ::= TextDecl? content
12212 *
12213 * Returns 0 if the entity is well formed, -1 in case of args problem and
12214 * the parser error code otherwise
12215 */
12216
12217int
12218xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012219 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012220 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012221 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012222}
12223
12224/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012225 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012226 * @doc: the document the chunk pertains to
12227 * @sax: the SAX handler bloc (possibly NULL)
12228 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12229 * @depth: Used for loop detection, use 0
12230 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012231 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012232 *
12233 * Parse a well-balanced chunk of an XML document
12234 * called by the parser
12235 * The allowed sequence for the Well Balanced Chunk is the one defined by
12236 * the content production in the XML grammar:
12237 *
12238 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12239 *
12240 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12241 * the parser error code otherwise
12242 */
12243
12244int
12245xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012246 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012247 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12248 depth, string, lst, 0 );
12249}
Daniel Veillard81273902003-09-30 00:43:48 +000012250#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012251
12252/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012253 * xmlParseBalancedChunkMemoryInternal:
12254 * @oldctxt: the existing parsing context
12255 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12256 * @user_data: the user data field for the parser context
12257 * @lst: the return value for the set of parsed nodes
12258 *
12259 *
12260 * Parse a well-balanced chunk of an XML document
12261 * called by the parser
12262 * The allowed sequence for the Well Balanced Chunk is the one defined by
12263 * the content production in the XML grammar:
12264 *
12265 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12266 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012267 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12268 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000012269 *
12270 * In case recover is set to 1, the nodelist will not be empty even if
12271 * the parsed chunk is not well balanced.
12272 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012273static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012274xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12275 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12276 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012277 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012278 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012279 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012280 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012281 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012282 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012283 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012284
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012285 if ((oldctxt->depth > 40) || (oldctxt->nbentities >= 500000)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012286 return(XML_ERR_ENTITY_LOOP);
12287 }
12288
12289
12290 if (lst != NULL)
12291 *lst = NULL;
12292 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012293 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012294
12295 size = xmlStrlen(string);
12296
12297 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012298 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012299 if (user_data != NULL)
12300 ctxt->userData = user_data;
12301 else
12302 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012303 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12304 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012305 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12306 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12307 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012308
12309 oldsax = ctxt->sax;
12310 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012311 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012312 ctxt->replaceEntities = oldctxt->replaceEntities;
12313 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012314
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012315 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012316 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012317 newDoc = xmlNewDoc(BAD_CAST "1.0");
12318 if (newDoc == NULL) {
12319 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012320 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012321 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012322 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012323 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012324 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012325 newDoc->dict = ctxt->dict;
12326 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012327 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012328 } else {
12329 ctxt->myDoc = oldctxt->myDoc;
12330 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012331 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012332 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012333 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12334 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012335 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012336 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012337 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012338 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012339 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012340 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012341 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012342 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012343 ctxt->myDoc->children = NULL;
12344 ctxt->myDoc->last = NULL;
12345 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012346 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012347 ctxt->instate = XML_PARSER_CONTENT;
12348 ctxt->depth = oldctxt->depth + 1;
12349
Daniel Veillard328f48c2002-11-15 15:24:34 +000012350 ctxt->validate = 0;
12351 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012352 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12353 /*
12354 * ID/IDREF registration will be done in xmlValidateElement below
12355 */
12356 ctxt->loadsubset |= XML_SKIP_IDS;
12357 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012358 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012359 ctxt->attsDefault = oldctxt->attsDefault;
12360 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012361
Daniel Veillard68e9e742002-11-16 15:35:11 +000012362 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012363 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012364 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012365 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012366 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012367 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012368 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012369 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012370 }
12371
12372 if (!ctxt->wellFormed) {
12373 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012374 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012375 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012376 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012377 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012378 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012379 }
12380
William M. Brack7b9154b2003-09-27 19:23:50 +000012381 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012382 xmlNodePtr cur;
12383
12384 /*
12385 * Return the newly created nodeset after unlinking it from
12386 * they pseudo parent.
12387 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012388 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012389 *lst = cur;
12390 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012391#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012392 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12393 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12394 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012395 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12396 oldctxt->myDoc, cur);
12397 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012398#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012399 cur->parent = NULL;
12400 cur = cur->next;
12401 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012402 ctxt->myDoc->children->children = NULL;
12403 }
12404 if (ctxt->myDoc != NULL) {
12405 xmlFreeNode(ctxt->myDoc->children);
12406 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012407 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012408 }
12409
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012410 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012411 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012412 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012413 ctxt->attsDefault = NULL;
12414 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012415 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012416 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012417 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012418 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012419
12420 return(ret);
12421}
12422
Daniel Veillard29b17482004-08-16 00:39:03 +000012423/**
12424 * xmlParseInNodeContext:
12425 * @node: the context node
12426 * @data: the input string
12427 * @datalen: the input string length in bytes
12428 * @options: a combination of xmlParserOption
12429 * @lst: the return value for the set of parsed nodes
12430 *
12431 * Parse a well-balanced chunk of an XML document
12432 * within the context (DTD, namespaces, etc ...) of the given node.
12433 *
12434 * The allowed sequence for the data is a Well Balanced Chunk defined by
12435 * the content production in the XML grammar:
12436 *
12437 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12438 *
12439 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12440 * error code otherwise
12441 */
12442xmlParserErrors
12443xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12444 int options, xmlNodePtr *lst) {
12445#ifdef SAX2
12446 xmlParserCtxtPtr ctxt;
12447 xmlDocPtr doc = NULL;
12448 xmlNodePtr fake, cur;
12449 int nsnr = 0;
12450
12451 xmlParserErrors ret = XML_ERR_OK;
12452
12453 /*
12454 * check all input parameters, grab the document
12455 */
12456 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12457 return(XML_ERR_INTERNAL_ERROR);
12458 switch (node->type) {
12459 case XML_ELEMENT_NODE:
12460 case XML_ATTRIBUTE_NODE:
12461 case XML_TEXT_NODE:
12462 case XML_CDATA_SECTION_NODE:
12463 case XML_ENTITY_REF_NODE:
12464 case XML_PI_NODE:
12465 case XML_COMMENT_NODE:
12466 case XML_DOCUMENT_NODE:
12467 case XML_HTML_DOCUMENT_NODE:
12468 break;
12469 default:
12470 return(XML_ERR_INTERNAL_ERROR);
12471
12472 }
12473 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12474 (node->type != XML_DOCUMENT_NODE) &&
12475 (node->type != XML_HTML_DOCUMENT_NODE))
12476 node = node->parent;
12477 if (node == NULL)
12478 return(XML_ERR_INTERNAL_ERROR);
12479 if (node->type == XML_ELEMENT_NODE)
12480 doc = node->doc;
12481 else
12482 doc = (xmlDocPtr) node;
12483 if (doc == NULL)
12484 return(XML_ERR_INTERNAL_ERROR);
12485
12486 /*
12487 * allocate a context and set-up everything not related to the
12488 * node position in the tree
12489 */
12490 if (doc->type == XML_DOCUMENT_NODE)
12491 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12492#ifdef LIBXML_HTML_ENABLED
12493 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12494 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12495#endif
12496 else
12497 return(XML_ERR_INTERNAL_ERROR);
12498
12499 if (ctxt == NULL)
12500 return(XML_ERR_NO_MEMORY);
12501 fake = xmlNewComment(NULL);
12502 if (fake == NULL) {
12503 xmlFreeParserCtxt(ctxt);
12504 return(XML_ERR_NO_MEMORY);
12505 }
12506 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012507
12508 /*
12509 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12510 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12511 * we must wait until the last moment to free the original one.
12512 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012513 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012514 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012515 xmlDictFree(ctxt->dict);
12516 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012517 } else
12518 options |= XML_PARSE_NODICT;
12519
Daniel Veillard37334572008-07-31 08:20:02 +000012520 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012521 xmlDetectSAX2(ctxt);
12522 ctxt->myDoc = doc;
12523
12524 if (node->type == XML_ELEMENT_NODE) {
12525 nodePush(ctxt, node);
12526 /*
12527 * initialize the SAX2 namespaces stack
12528 */
12529 cur = node;
12530 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12531 xmlNsPtr ns = cur->nsDef;
12532 const xmlChar *iprefix, *ihref;
12533
12534 while (ns != NULL) {
12535 if (ctxt->dict) {
12536 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12537 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12538 } else {
12539 iprefix = ns->prefix;
12540 ihref = ns->href;
12541 }
12542
12543 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12544 nsPush(ctxt, iprefix, ihref);
12545 nsnr++;
12546 }
12547 ns = ns->next;
12548 }
12549 cur = cur->parent;
12550 }
12551 ctxt->instate = XML_PARSER_CONTENT;
12552 }
12553
12554 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12555 /*
12556 * ID/IDREF registration will be done in xmlValidateElement below
12557 */
12558 ctxt->loadsubset |= XML_SKIP_IDS;
12559 }
12560
Daniel Veillard499cc922006-01-18 17:22:35 +000012561#ifdef LIBXML_HTML_ENABLED
12562 if (doc->type == XML_HTML_DOCUMENT_NODE)
12563 __htmlParseContent(ctxt);
12564 else
12565#endif
12566 xmlParseContent(ctxt);
12567
Daniel Veillard29b17482004-08-16 00:39:03 +000012568 nsPop(ctxt, nsnr);
12569 if ((RAW == '<') && (NXT(1) == '/')) {
12570 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12571 } else if (RAW != 0) {
12572 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12573 }
12574 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12575 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12576 ctxt->wellFormed = 0;
12577 }
12578
12579 if (!ctxt->wellFormed) {
12580 if (ctxt->errNo == 0)
12581 ret = XML_ERR_INTERNAL_ERROR;
12582 else
12583 ret = (xmlParserErrors)ctxt->errNo;
12584 } else {
12585 ret = XML_ERR_OK;
12586 }
12587
12588 /*
12589 * Return the newly created nodeset after unlinking it from
12590 * the pseudo sibling.
12591 */
12592
12593 cur = fake->next;
12594 fake->next = NULL;
12595 node->last = fake;
12596
12597 if (cur != NULL) {
12598 cur->prev = NULL;
12599 }
12600
12601 *lst = cur;
12602
12603 while (cur != NULL) {
12604 cur->parent = NULL;
12605 cur = cur->next;
12606 }
12607
12608 xmlUnlinkNode(fake);
12609 xmlFreeNode(fake);
12610
12611
12612 if (ret != XML_ERR_OK) {
12613 xmlFreeNodeList(*lst);
12614 *lst = NULL;
12615 }
William M. Brackc3f81342004-10-03 01:22:44 +000012616
William M. Brackb7b54de2004-10-06 16:38:01 +000012617 if (doc->dict != NULL)
12618 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012619 xmlFreeParserCtxt(ctxt);
12620
12621 return(ret);
12622#else /* !SAX2 */
12623 return(XML_ERR_INTERNAL_ERROR);
12624#endif
12625}
12626
Daniel Veillard81273902003-09-30 00:43:48 +000012627#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012628/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012629 * xmlParseBalancedChunkMemoryRecover:
12630 * @doc: the document the chunk pertains to
12631 * @sax: the SAX handler bloc (possibly NULL)
12632 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12633 * @depth: Used for loop detection, use 0
12634 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12635 * @lst: the return value for the set of parsed nodes
12636 * @recover: return nodes even if the data is broken (use 0)
12637 *
12638 *
12639 * Parse a well-balanced chunk of an XML document
12640 * called by the parser
12641 * The allowed sequence for the Well Balanced Chunk is the one defined by
12642 * the content production in the XML grammar:
12643 *
12644 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12645 *
12646 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12647 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012648 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012649 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012650 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12651 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012652 */
12653int
12654xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012655 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012656 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012657 xmlParserCtxtPtr ctxt;
12658 xmlDocPtr newDoc;
12659 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012660 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012661 int size;
12662 int ret = 0;
12663
12664 if (depth > 40) {
12665 return(XML_ERR_ENTITY_LOOP);
12666 }
12667
12668
Daniel Veillardcda96922001-08-21 10:56:31 +000012669 if (lst != NULL)
12670 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012671 if (string == NULL)
12672 return(-1);
12673
12674 size = xmlStrlen(string);
12675
12676 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12677 if (ctxt == NULL) return(-1);
12678 ctxt->userData = ctxt;
12679 if (sax != NULL) {
12680 oldsax = ctxt->sax;
12681 ctxt->sax = sax;
12682 if (user_data != NULL)
12683 ctxt->userData = user_data;
12684 }
12685 newDoc = xmlNewDoc(BAD_CAST "1.0");
12686 if (newDoc == NULL) {
12687 xmlFreeParserCtxt(ctxt);
12688 return(-1);
12689 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012690 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012691 if ((doc != NULL) && (doc->dict != NULL)) {
12692 xmlDictFree(ctxt->dict);
12693 ctxt->dict = doc->dict;
12694 xmlDictReference(ctxt->dict);
12695 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12696 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12697 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12698 ctxt->dictNames = 1;
12699 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012700 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012701 }
Owen Taylor3473f882001-02-23 17:55:21 +000012702 if (doc != NULL) {
12703 newDoc->intSubset = doc->intSubset;
12704 newDoc->extSubset = doc->extSubset;
12705 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012706 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12707 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012708 if (sax != NULL)
12709 ctxt->sax = oldsax;
12710 xmlFreeParserCtxt(ctxt);
12711 newDoc->intSubset = NULL;
12712 newDoc->extSubset = NULL;
12713 xmlFreeDoc(newDoc);
12714 return(-1);
12715 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012716 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12717 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012718 if (doc == NULL) {
12719 ctxt->myDoc = newDoc;
12720 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012721 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012722 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012723 /* Ensure that doc has XML spec namespace */
12724 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12725 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012726 }
12727 ctxt->instate = XML_PARSER_CONTENT;
12728 ctxt->depth = depth;
12729
12730 /*
12731 * Doing validity checking on chunk doesn't make sense
12732 */
12733 ctxt->validate = 0;
12734 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012735 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012736
Daniel Veillardb39bc392002-10-26 19:29:51 +000012737 if ( doc != NULL ){
12738 content = doc->children;
12739 doc->children = NULL;
12740 xmlParseContent(ctxt);
12741 doc->children = content;
12742 }
12743 else {
12744 xmlParseContent(ctxt);
12745 }
Owen Taylor3473f882001-02-23 17:55:21 +000012746 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012747 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012748 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012749 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012750 }
12751 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012752 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012753 }
12754
12755 if (!ctxt->wellFormed) {
12756 if (ctxt->errNo == 0)
12757 ret = 1;
12758 else
12759 ret = ctxt->errNo;
12760 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012761 ret = 0;
12762 }
12763
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012764 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12765 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012766
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012767 /*
12768 * Return the newly created nodeset after unlinking it from
12769 * they pseudo parent.
12770 */
12771 cur = newDoc->children->children;
12772 *lst = cur;
12773 while (cur != NULL) {
12774 xmlSetTreeDoc(cur, doc);
12775 cur->parent = NULL;
12776 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012777 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012778 newDoc->children->children = NULL;
12779 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012780
Owen Taylor3473f882001-02-23 17:55:21 +000012781 if (sax != NULL)
12782 ctxt->sax = oldsax;
12783 xmlFreeParserCtxt(ctxt);
12784 newDoc->intSubset = NULL;
12785 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012786 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012787 xmlFreeDoc(newDoc);
12788
12789 return(ret);
12790}
12791
12792/**
12793 * xmlSAXParseEntity:
12794 * @sax: the SAX handler block
12795 * @filename: the filename
12796 *
12797 * parse an XML external entity out of context and build a tree.
12798 * It use the given SAX function block to handle the parsing callback.
12799 * If sax is NULL, fallback to the default DOM tree building routines.
12800 *
12801 * [78] extParsedEnt ::= TextDecl? content
12802 *
12803 * This correspond to a "Well Balanced" chunk
12804 *
12805 * Returns the resulting document tree
12806 */
12807
12808xmlDocPtr
12809xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12810 xmlDocPtr ret;
12811 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012812
12813 ctxt = xmlCreateFileParserCtxt(filename);
12814 if (ctxt == NULL) {
12815 return(NULL);
12816 }
12817 if (sax != NULL) {
12818 if (ctxt->sax != NULL)
12819 xmlFree(ctxt->sax);
12820 ctxt->sax = sax;
12821 ctxt->userData = NULL;
12822 }
12823
Owen Taylor3473f882001-02-23 17:55:21 +000012824 xmlParseExtParsedEnt(ctxt);
12825
12826 if (ctxt->wellFormed)
12827 ret = ctxt->myDoc;
12828 else {
12829 ret = NULL;
12830 xmlFreeDoc(ctxt->myDoc);
12831 ctxt->myDoc = NULL;
12832 }
12833 if (sax != NULL)
12834 ctxt->sax = NULL;
12835 xmlFreeParserCtxt(ctxt);
12836
12837 return(ret);
12838}
12839
12840/**
12841 * xmlParseEntity:
12842 * @filename: the filename
12843 *
12844 * parse an XML external entity out of context and build a tree.
12845 *
12846 * [78] extParsedEnt ::= TextDecl? content
12847 *
12848 * This correspond to a "Well Balanced" chunk
12849 *
12850 * Returns the resulting document tree
12851 */
12852
12853xmlDocPtr
12854xmlParseEntity(const char *filename) {
12855 return(xmlSAXParseEntity(NULL, filename));
12856}
Daniel Veillard81273902003-09-30 00:43:48 +000012857#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012858
12859/**
12860 * xmlCreateEntityParserCtxt:
12861 * @URL: the entity URL
12862 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012863 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012864 *
12865 * Create a parser context for an external entity
12866 * Automatic support for ZLIB/Compress compressed document is provided
12867 * by default if found at compile-time.
12868 *
12869 * Returns the new parser context or NULL
12870 */
12871xmlParserCtxtPtr
12872xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12873 const xmlChar *base) {
12874 xmlParserCtxtPtr ctxt;
12875 xmlParserInputPtr inputStream;
12876 char *directory = NULL;
12877 xmlChar *uri;
12878
12879 ctxt = xmlNewParserCtxt();
12880 if (ctxt == NULL) {
12881 return(NULL);
12882 }
12883
12884 uri = xmlBuildURI(URL, base);
12885
12886 if (uri == NULL) {
12887 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12888 if (inputStream == NULL) {
12889 xmlFreeParserCtxt(ctxt);
12890 return(NULL);
12891 }
12892
12893 inputPush(ctxt, inputStream);
12894
12895 if ((ctxt->directory == NULL) && (directory == NULL))
12896 directory = xmlParserGetDirectory((char *)URL);
12897 if ((ctxt->directory == NULL) && (directory != NULL))
12898 ctxt->directory = directory;
12899 } else {
12900 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12901 if (inputStream == NULL) {
12902 xmlFree(uri);
12903 xmlFreeParserCtxt(ctxt);
12904 return(NULL);
12905 }
12906
12907 inputPush(ctxt, inputStream);
12908
12909 if ((ctxt->directory == NULL) && (directory == NULL))
12910 directory = xmlParserGetDirectory((char *)uri);
12911 if ((ctxt->directory == NULL) && (directory != NULL))
12912 ctxt->directory = directory;
12913 xmlFree(uri);
12914 }
Owen Taylor3473f882001-02-23 17:55:21 +000012915 return(ctxt);
12916}
12917
12918/************************************************************************
12919 * *
12920 * Front ends when parsing from a file *
12921 * *
12922 ************************************************************************/
12923
12924/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012925 * xmlCreateURLParserCtxt:
12926 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012927 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012928 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012929 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012930 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012931 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012932 *
12933 * Returns the new parser context or NULL
12934 */
12935xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012936xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012937{
12938 xmlParserCtxtPtr ctxt;
12939 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012940 char *directory = NULL;
12941
Owen Taylor3473f882001-02-23 17:55:21 +000012942 ctxt = xmlNewParserCtxt();
12943 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012944 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012945 return(NULL);
12946 }
12947
Daniel Veillarddf292f72005-01-16 19:00:15 +000012948 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000012949 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000012950 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000012951
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012952 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012953 if (inputStream == NULL) {
12954 xmlFreeParserCtxt(ctxt);
12955 return(NULL);
12956 }
12957
Owen Taylor3473f882001-02-23 17:55:21 +000012958 inputPush(ctxt, inputStream);
12959 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012960 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012961 if ((ctxt->directory == NULL) && (directory != NULL))
12962 ctxt->directory = directory;
12963
12964 return(ctxt);
12965}
12966
Daniel Veillard61b93382003-11-03 14:28:31 +000012967/**
12968 * xmlCreateFileParserCtxt:
12969 * @filename: the filename
12970 *
12971 * Create a parser context for a file content.
12972 * Automatic support for ZLIB/Compress compressed document is provided
12973 * by default if found at compile-time.
12974 *
12975 * Returns the new parser context or NULL
12976 */
12977xmlParserCtxtPtr
12978xmlCreateFileParserCtxt(const char *filename)
12979{
12980 return(xmlCreateURLParserCtxt(filename, 0));
12981}
12982
Daniel Veillard81273902003-09-30 00:43:48 +000012983#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012984/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012985 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012986 * @sax: the SAX handler block
12987 * @filename: the filename
12988 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12989 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012990 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012991 *
12992 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12993 * compressed document is provided by default if found at compile-time.
12994 * It use the given SAX function block to handle the parsing callback.
12995 * If sax is NULL, fallback to the default DOM tree building routines.
12996 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012997 * User data (void *) is stored within the parser context in the
12998 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012999 *
Owen Taylor3473f882001-02-23 17:55:21 +000013000 * Returns the resulting document tree
13001 */
13002
13003xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013004xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13005 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013006 xmlDocPtr ret;
13007 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013008
Daniel Veillard635ef722001-10-29 11:48:19 +000013009 xmlInitParser();
13010
Owen Taylor3473f882001-02-23 17:55:21 +000013011 ctxt = xmlCreateFileParserCtxt(filename);
13012 if (ctxt == NULL) {
13013 return(NULL);
13014 }
13015 if (sax != NULL) {
13016 if (ctxt->sax != NULL)
13017 xmlFree(ctxt->sax);
13018 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013019 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013020 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013021 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013022 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013023 }
Owen Taylor3473f882001-02-23 17:55:21 +000013024
Daniel Veillard37d2d162008-03-14 10:54:00 +000013025 if (ctxt->directory == NULL)
13026 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013027
Daniel Veillarddad3f682002-11-17 16:47:27 +000013028 ctxt->recovery = recovery;
13029
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlParseDocument(ctxt);
13031
William M. Brackc07329e2003-09-08 01:57:30 +000013032 if ((ctxt->wellFormed) || recovery) {
13033 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013034 if (ret != NULL) {
13035 if (ctxt->input->buf->compressed > 0)
13036 ret->compression = 9;
13037 else
13038 ret->compression = ctxt->input->buf->compressed;
13039 }
William M. Brackc07329e2003-09-08 01:57:30 +000013040 }
Owen Taylor3473f882001-02-23 17:55:21 +000013041 else {
13042 ret = NULL;
13043 xmlFreeDoc(ctxt->myDoc);
13044 ctxt->myDoc = NULL;
13045 }
13046 if (sax != NULL)
13047 ctxt->sax = NULL;
13048 xmlFreeParserCtxt(ctxt);
13049
13050 return(ret);
13051}
13052
13053/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013054 * xmlSAXParseFile:
13055 * @sax: the SAX handler block
13056 * @filename: the filename
13057 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13058 * documents
13059 *
13060 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13061 * compressed document is provided by default if found at compile-time.
13062 * It use the given SAX function block to handle the parsing callback.
13063 * If sax is NULL, fallback to the default DOM tree building routines.
13064 *
13065 * Returns the resulting document tree
13066 */
13067
13068xmlDocPtr
13069xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13070 int recovery) {
13071 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13072}
13073
13074/**
Owen Taylor3473f882001-02-23 17:55:21 +000013075 * xmlRecoverDoc:
13076 * @cur: a pointer to an array of xmlChar
13077 *
13078 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013079 * In the case the document is not Well Formed, a attempt to build a
13080 * tree is tried anyway
13081 *
13082 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013083 */
13084
13085xmlDocPtr
13086xmlRecoverDoc(xmlChar *cur) {
13087 return(xmlSAXParseDoc(NULL, cur, 1));
13088}
13089
13090/**
13091 * xmlParseFile:
13092 * @filename: the filename
13093 *
13094 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13095 * compressed document is provided by default if found at compile-time.
13096 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013097 * Returns the resulting document tree if the file was wellformed,
13098 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013099 */
13100
13101xmlDocPtr
13102xmlParseFile(const char *filename) {
13103 return(xmlSAXParseFile(NULL, filename, 0));
13104}
13105
13106/**
13107 * xmlRecoverFile:
13108 * @filename: the filename
13109 *
13110 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13111 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013112 * In the case the document is not Well Formed, it attempts to build
13113 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013114 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013115 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013116 */
13117
13118xmlDocPtr
13119xmlRecoverFile(const char *filename) {
13120 return(xmlSAXParseFile(NULL, filename, 1));
13121}
13122
13123
13124/**
13125 * xmlSetupParserForBuffer:
13126 * @ctxt: an XML parser context
13127 * @buffer: a xmlChar * buffer
13128 * @filename: a file name
13129 *
13130 * Setup the parser context to parse a new buffer; Clears any prior
13131 * contents from the parser context. The buffer parameter must not be
13132 * NULL, but the filename parameter can be
13133 */
13134void
13135xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13136 const char* filename)
13137{
13138 xmlParserInputPtr input;
13139
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013140 if ((ctxt == NULL) || (buffer == NULL))
13141 return;
13142
Owen Taylor3473f882001-02-23 17:55:21 +000013143 input = xmlNewInputStream(ctxt);
13144 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013145 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013146 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013147 return;
13148 }
13149
13150 xmlClearParserCtxt(ctxt);
13151 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013152 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013153 input->base = buffer;
13154 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013155 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013156 inputPush(ctxt, input);
13157}
13158
13159/**
13160 * xmlSAXUserParseFile:
13161 * @sax: a SAX handler
13162 * @user_data: The user data returned on SAX callbacks
13163 * @filename: a file name
13164 *
13165 * parse an XML file and call the given SAX handler routines.
13166 * Automatic support for ZLIB/Compress compressed document is provided
13167 *
13168 * Returns 0 in case of success or a error number otherwise
13169 */
13170int
13171xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13172 const char *filename) {
13173 int ret = 0;
13174 xmlParserCtxtPtr ctxt;
13175
13176 ctxt = xmlCreateFileParserCtxt(filename);
13177 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013178 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013179 xmlFree(ctxt->sax);
13180 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013181 xmlDetectSAX2(ctxt);
13182
Owen Taylor3473f882001-02-23 17:55:21 +000013183 if (user_data != NULL)
13184 ctxt->userData = user_data;
13185
13186 xmlParseDocument(ctxt);
13187
13188 if (ctxt->wellFormed)
13189 ret = 0;
13190 else {
13191 if (ctxt->errNo != 0)
13192 ret = ctxt->errNo;
13193 else
13194 ret = -1;
13195 }
13196 if (sax != NULL)
13197 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013198 if (ctxt->myDoc != NULL) {
13199 xmlFreeDoc(ctxt->myDoc);
13200 ctxt->myDoc = NULL;
13201 }
Owen Taylor3473f882001-02-23 17:55:21 +000013202 xmlFreeParserCtxt(ctxt);
13203
13204 return ret;
13205}
Daniel Veillard81273902003-09-30 00:43:48 +000013206#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013207
13208/************************************************************************
13209 * *
13210 * Front ends when parsing from memory *
13211 * *
13212 ************************************************************************/
13213
13214/**
13215 * xmlCreateMemoryParserCtxt:
13216 * @buffer: a pointer to a char array
13217 * @size: the size of the array
13218 *
13219 * Create a parser context for an XML in-memory document.
13220 *
13221 * Returns the new parser context or NULL
13222 */
13223xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013224xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013225 xmlParserCtxtPtr ctxt;
13226 xmlParserInputPtr input;
13227 xmlParserInputBufferPtr buf;
13228
13229 if (buffer == NULL)
13230 return(NULL);
13231 if (size <= 0)
13232 return(NULL);
13233
13234 ctxt = xmlNewParserCtxt();
13235 if (ctxt == NULL)
13236 return(NULL);
13237
Daniel Veillard53350552003-09-18 13:35:51 +000013238 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013239 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013240 if (buf == NULL) {
13241 xmlFreeParserCtxt(ctxt);
13242 return(NULL);
13243 }
Owen Taylor3473f882001-02-23 17:55:21 +000013244
13245 input = xmlNewInputStream(ctxt);
13246 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013247 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013248 xmlFreeParserCtxt(ctxt);
13249 return(NULL);
13250 }
13251
13252 input->filename = NULL;
13253 input->buf = buf;
13254 input->base = input->buf->buffer->content;
13255 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013256 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013257
13258 inputPush(ctxt, input);
13259 return(ctxt);
13260}
13261
Daniel Veillard81273902003-09-30 00:43:48 +000013262#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013263/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013264 * xmlSAXParseMemoryWithData:
13265 * @sax: the SAX handler block
13266 * @buffer: an pointer to a char array
13267 * @size: the size of the array
13268 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13269 * documents
13270 * @data: the userdata
13271 *
13272 * parse an XML in-memory block and use the given SAX function block
13273 * to handle the parsing callback. If sax is NULL, fallback to the default
13274 * DOM tree building routines.
13275 *
13276 * User data (void *) is stored within the parser context in the
13277 * context's _private member, so it is available nearly everywhere in libxml
13278 *
13279 * Returns the resulting document tree
13280 */
13281
13282xmlDocPtr
13283xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13284 int size, int recovery, void *data) {
13285 xmlDocPtr ret;
13286 xmlParserCtxtPtr ctxt;
13287
13288 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13289 if (ctxt == NULL) return(NULL);
13290 if (sax != NULL) {
13291 if (ctxt->sax != NULL)
13292 xmlFree(ctxt->sax);
13293 ctxt->sax = sax;
13294 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013295 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013296 if (data!=NULL) {
13297 ctxt->_private=data;
13298 }
13299
Daniel Veillardadba5f12003-04-04 16:09:01 +000013300 ctxt->recovery = recovery;
13301
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013302 xmlParseDocument(ctxt);
13303
13304 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13305 else {
13306 ret = NULL;
13307 xmlFreeDoc(ctxt->myDoc);
13308 ctxt->myDoc = NULL;
13309 }
13310 if (sax != NULL)
13311 ctxt->sax = NULL;
13312 xmlFreeParserCtxt(ctxt);
13313
13314 return(ret);
13315}
13316
13317/**
Owen Taylor3473f882001-02-23 17:55:21 +000013318 * xmlSAXParseMemory:
13319 * @sax: the SAX handler block
13320 * @buffer: an pointer to a char array
13321 * @size: the size of the array
13322 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13323 * documents
13324 *
13325 * parse an XML in-memory block and use the given SAX function block
13326 * to handle the parsing callback. If sax is NULL, fallback to the default
13327 * DOM tree building routines.
13328 *
13329 * Returns the resulting document tree
13330 */
13331xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013332xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13333 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013334 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013335}
13336
13337/**
13338 * xmlParseMemory:
13339 * @buffer: an pointer to a char array
13340 * @size: the size of the array
13341 *
13342 * parse an XML in-memory block and build a tree.
13343 *
13344 * Returns the resulting document tree
13345 */
13346
Daniel Veillard50822cb2001-07-26 20:05:51 +000013347xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013348 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13349}
13350
13351/**
13352 * xmlRecoverMemory:
13353 * @buffer: an pointer to a char array
13354 * @size: the size of the array
13355 *
13356 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013357 * In the case the document is not Well Formed, an attempt to
13358 * build a tree is tried anyway
13359 *
13360 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013361 */
13362
Daniel Veillard50822cb2001-07-26 20:05:51 +000013363xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013364 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13365}
13366
13367/**
13368 * xmlSAXUserParseMemory:
13369 * @sax: a SAX handler
13370 * @user_data: The user data returned on SAX callbacks
13371 * @buffer: an in-memory XML document input
13372 * @size: the length of the XML document in bytes
13373 *
13374 * A better SAX parsing routine.
13375 * parse an XML in-memory buffer and call the given SAX handler routines.
13376 *
13377 * Returns 0 in case of success or a error number otherwise
13378 */
13379int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013380 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013381 int ret = 0;
13382 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013383
13384 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13385 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013386 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13387 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013388 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013389 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013390
Daniel Veillard30211a02001-04-26 09:33:18 +000013391 if (user_data != NULL)
13392 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013393
13394 xmlParseDocument(ctxt);
13395
13396 if (ctxt->wellFormed)
13397 ret = 0;
13398 else {
13399 if (ctxt->errNo != 0)
13400 ret = ctxt->errNo;
13401 else
13402 ret = -1;
13403 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013404 if (sax != NULL)
13405 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013406 if (ctxt->myDoc != NULL) {
13407 xmlFreeDoc(ctxt->myDoc);
13408 ctxt->myDoc = NULL;
13409 }
Owen Taylor3473f882001-02-23 17:55:21 +000013410 xmlFreeParserCtxt(ctxt);
13411
13412 return ret;
13413}
Daniel Veillard81273902003-09-30 00:43:48 +000013414#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013415
13416/**
13417 * xmlCreateDocParserCtxt:
13418 * @cur: a pointer to an array of xmlChar
13419 *
13420 * Creates a parser context for an XML in-memory document.
13421 *
13422 * Returns the new parser context or NULL
13423 */
13424xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013425xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013426 int len;
13427
13428 if (cur == NULL)
13429 return(NULL);
13430 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013431 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013432}
13433
Daniel Veillard81273902003-09-30 00:43:48 +000013434#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013435/**
13436 * xmlSAXParseDoc:
13437 * @sax: the SAX handler block
13438 * @cur: a pointer to an array of xmlChar
13439 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13440 * documents
13441 *
13442 * parse an XML in-memory document and build a tree.
13443 * It use the given SAX function block to handle the parsing callback.
13444 * If sax is NULL, fallback to the default DOM tree building routines.
13445 *
13446 * Returns the resulting document tree
13447 */
13448
13449xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013450xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013451 xmlDocPtr ret;
13452 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013453 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013454
Daniel Veillard38936062004-11-04 17:45:11 +000013455 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013456
13457
13458 ctxt = xmlCreateDocParserCtxt(cur);
13459 if (ctxt == NULL) return(NULL);
13460 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013461 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013462 ctxt->sax = sax;
13463 ctxt->userData = NULL;
13464 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013465 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013466
13467 xmlParseDocument(ctxt);
13468 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13469 else {
13470 ret = NULL;
13471 xmlFreeDoc(ctxt->myDoc);
13472 ctxt->myDoc = NULL;
13473 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013474 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013475 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013476 xmlFreeParserCtxt(ctxt);
13477
13478 return(ret);
13479}
13480
13481/**
13482 * xmlParseDoc:
13483 * @cur: a pointer to an array of xmlChar
13484 *
13485 * parse an XML in-memory document and build a tree.
13486 *
13487 * Returns the resulting document tree
13488 */
13489
13490xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013491xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013492 return(xmlSAXParseDoc(NULL, cur, 0));
13493}
Daniel Veillard81273902003-09-30 00:43:48 +000013494#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013495
Daniel Veillard81273902003-09-30 00:43:48 +000013496#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013497/************************************************************************
13498 * *
13499 * Specific function to keep track of entities references *
13500 * and used by the XSLT debugger *
13501 * *
13502 ************************************************************************/
13503
13504static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13505
13506/**
13507 * xmlAddEntityReference:
13508 * @ent : A valid entity
13509 * @firstNode : A valid first node for children of entity
13510 * @lastNode : A valid last node of children entity
13511 *
13512 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13513 */
13514static void
13515xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13516 xmlNodePtr lastNode)
13517{
13518 if (xmlEntityRefFunc != NULL) {
13519 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13520 }
13521}
13522
13523
13524/**
13525 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013526 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013527 *
13528 * Set the function to call call back when a xml reference has been made
13529 */
13530void
13531xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13532{
13533 xmlEntityRefFunc = func;
13534}
Daniel Veillard81273902003-09-30 00:43:48 +000013535#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013536
13537/************************************************************************
13538 * *
13539 * Miscellaneous *
13540 * *
13541 ************************************************************************/
13542
13543#ifdef LIBXML_XPATH_ENABLED
13544#include <libxml/xpath.h>
13545#endif
13546
Daniel Veillardffa3c742005-07-21 13:24:09 +000013547extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013548static int xmlParserInitialized = 0;
13549
13550/**
13551 * xmlInitParser:
13552 *
13553 * Initialization function for the XML parser.
13554 * This is not reentrant. Call once before processing in case of
13555 * use in multithreaded programs.
13556 */
13557
13558void
13559xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013560 if (xmlParserInitialized != 0)
13561 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013562
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013563#ifdef LIBXML_THREAD_ENABLED
13564 __xmlGlobalInitMutexLock();
13565 if (xmlParserInitialized == 0) {
13566#endif
13567 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13568 (xmlGenericError == NULL))
13569 initGenericErrorDefaultFunc(NULL);
13570 xmlInitGlobals();
13571 xmlInitThreads();
13572 xmlInitMemory();
13573 xmlInitCharEncodingHandlers();
13574 xmlDefaultSAXHandlerInit();
13575 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013576#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013577 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013578#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013579#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013580 htmlInitAutoClose();
13581 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013582#endif
13583#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013584 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013585#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013586 xmlParserInitialized = 1;
13587#ifdef LIBXML_THREAD_ENABLED
13588 }
13589 __xmlGlobalInitMutexUnlock();
13590#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013591}
13592
13593/**
13594 * xmlCleanupParser:
13595 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013596 * This function name is somewhat misleading. It does not clean up
13597 * parser state, it cleans up memory allocated by the library itself.
13598 * It is a cleanup function for the XML library. It tries to reclaim all
13599 * related global memory allocated for the library processing.
13600 * It doesn't deallocate any document related memory. One should
13601 * call xmlCleanupParser() only when the process has finished using
13602 * the library and all XML/HTML documents built with it.
13603 * See also xmlInitParser() which has the opposite function of preparing
13604 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013605 */
13606
13607void
13608xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013609 if (!xmlParserInitialized)
13610 return;
13611
Owen Taylor3473f882001-02-23 17:55:21 +000013612 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013613#ifdef LIBXML_CATALOG_ENABLED
13614 xmlCatalogCleanup();
13615#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013616 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013617 xmlCleanupInputCallbacks();
13618#ifdef LIBXML_OUTPUT_ENABLED
13619 xmlCleanupOutputCallbacks();
13620#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013621#ifdef LIBXML_SCHEMAS_ENABLED
13622 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013623 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013624#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013625 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013626 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013627 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013628 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013629 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013630}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013631
13632/************************************************************************
13633 * *
13634 * New set (2.6.0) of simpler and more flexible APIs *
13635 * *
13636 ************************************************************************/
13637
13638/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013639 * DICT_FREE:
13640 * @str: a string
13641 *
13642 * Free a string if it is not owned by the "dict" dictionnary in the
13643 * current scope
13644 */
13645#define DICT_FREE(str) \
13646 if ((str) && ((!dict) || \
13647 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13648 xmlFree((char *)(str));
13649
13650/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013651 * xmlCtxtReset:
13652 * @ctxt: an XML parser context
13653 *
13654 * Reset a parser context
13655 */
13656void
13657xmlCtxtReset(xmlParserCtxtPtr ctxt)
13658{
13659 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013660 xmlDictPtr dict;
13661
13662 if (ctxt == NULL)
13663 return;
13664
13665 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013666
13667 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13668 xmlFreeInputStream(input);
13669 }
13670 ctxt->inputNr = 0;
13671 ctxt->input = NULL;
13672
13673 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013674 if (ctxt->spaceTab != NULL) {
13675 ctxt->spaceTab[0] = -1;
13676 ctxt->space = &ctxt->spaceTab[0];
13677 } else {
13678 ctxt->space = NULL;
13679 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013680
13681
13682 ctxt->nodeNr = 0;
13683 ctxt->node = NULL;
13684
13685 ctxt->nameNr = 0;
13686 ctxt->name = NULL;
13687
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013688 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013689 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013690 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013691 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013692 DICT_FREE(ctxt->directory);
13693 ctxt->directory = NULL;
13694 DICT_FREE(ctxt->extSubURI);
13695 ctxt->extSubURI = NULL;
13696 DICT_FREE(ctxt->extSubSystem);
13697 ctxt->extSubSystem = NULL;
13698 if (ctxt->myDoc != NULL)
13699 xmlFreeDoc(ctxt->myDoc);
13700 ctxt->myDoc = NULL;
13701
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013702 ctxt->standalone = -1;
13703 ctxt->hasExternalSubset = 0;
13704 ctxt->hasPErefs = 0;
13705 ctxt->html = 0;
13706 ctxt->external = 0;
13707 ctxt->instate = XML_PARSER_START;
13708 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013709
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013710 ctxt->wellFormed = 1;
13711 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013712 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013713 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013714#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013715 ctxt->vctxt.userData = ctxt;
13716 ctxt->vctxt.error = xmlParserValidityError;
13717 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013718#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013719 ctxt->record_info = 0;
13720 ctxt->nbChars = 0;
13721 ctxt->checkIndex = 0;
13722 ctxt->inSubset = 0;
13723 ctxt->errNo = XML_ERR_OK;
13724 ctxt->depth = 0;
13725 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13726 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013727 ctxt->nbentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013728 xmlInitNodeInfoSeq(&ctxt->node_seq);
13729
13730 if (ctxt->attsDefault != NULL) {
13731 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13732 ctxt->attsDefault = NULL;
13733 }
13734 if (ctxt->attsSpecial != NULL) {
13735 xmlHashFree(ctxt->attsSpecial, NULL);
13736 ctxt->attsSpecial = NULL;
13737 }
13738
Daniel Veillard4432df22003-09-28 18:58:27 +000013739#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013740 if (ctxt->catalogs != NULL)
13741 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013742#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013743 if (ctxt->lastError.code != XML_ERR_OK)
13744 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013745}
13746
13747/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013748 * xmlCtxtResetPush:
13749 * @ctxt: an XML parser context
13750 * @chunk: a pointer to an array of chars
13751 * @size: number of chars in the array
13752 * @filename: an optional file name or URI
13753 * @encoding: the document encoding, or NULL
13754 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013755 * Reset a push parser context
13756 *
13757 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013758 */
13759int
13760xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13761 int size, const char *filename, const char *encoding)
13762{
13763 xmlParserInputPtr inputStream;
13764 xmlParserInputBufferPtr buf;
13765 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13766
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013767 if (ctxt == NULL)
13768 return(1);
13769
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013770 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13771 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13772
13773 buf = xmlAllocParserInputBuffer(enc);
13774 if (buf == NULL)
13775 return(1);
13776
13777 if (ctxt == NULL) {
13778 xmlFreeParserInputBuffer(buf);
13779 return(1);
13780 }
13781
13782 xmlCtxtReset(ctxt);
13783
13784 if (ctxt->pushTab == NULL) {
13785 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13786 sizeof(xmlChar *));
13787 if (ctxt->pushTab == NULL) {
13788 xmlErrMemory(ctxt, NULL);
13789 xmlFreeParserInputBuffer(buf);
13790 return(1);
13791 }
13792 }
13793
13794 if (filename == NULL) {
13795 ctxt->directory = NULL;
13796 } else {
13797 ctxt->directory = xmlParserGetDirectory(filename);
13798 }
13799
13800 inputStream = xmlNewInputStream(ctxt);
13801 if (inputStream == NULL) {
13802 xmlFreeParserInputBuffer(buf);
13803 return(1);
13804 }
13805
13806 if (filename == NULL)
13807 inputStream->filename = NULL;
13808 else
13809 inputStream->filename = (char *)
13810 xmlCanonicPath((const xmlChar *) filename);
13811 inputStream->buf = buf;
13812 inputStream->base = inputStream->buf->buffer->content;
13813 inputStream->cur = inputStream->buf->buffer->content;
13814 inputStream->end =
13815 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13816
13817 inputPush(ctxt, inputStream);
13818
13819 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13820 (ctxt->input->buf != NULL)) {
13821 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13822 int cur = ctxt->input->cur - ctxt->input->base;
13823
13824 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13825
13826 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13827 ctxt->input->cur = ctxt->input->base + cur;
13828 ctxt->input->end =
13829 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13830 use];
13831#ifdef DEBUG_PUSH
13832 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13833#endif
13834 }
13835
13836 if (encoding != NULL) {
13837 xmlCharEncodingHandlerPtr hdlr;
13838
Daniel Veillard37334572008-07-31 08:20:02 +000013839 if (ctxt->encoding != NULL)
13840 xmlFree((xmlChar *) ctxt->encoding);
13841 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13842
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013843 hdlr = xmlFindCharEncodingHandler(encoding);
13844 if (hdlr != NULL) {
13845 xmlSwitchToEncoding(ctxt, hdlr);
13846 } else {
13847 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13848 "Unsupported encoding %s\n", BAD_CAST encoding);
13849 }
13850 } else if (enc != XML_CHAR_ENCODING_NONE) {
13851 xmlSwitchEncoding(ctxt, enc);
13852 }
13853
13854 return(0);
13855}
13856
Daniel Veillard37334572008-07-31 08:20:02 +000013857
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013858/**
Daniel Veillard37334572008-07-31 08:20:02 +000013859 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013860 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013861 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000013862 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013863 *
13864 * Applies the options to the parser context
13865 *
13866 * Returns 0 in case of success, the set of unknown or unimplemented options
13867 * in case of error.
13868 */
Daniel Veillard37334572008-07-31 08:20:02 +000013869static int
13870xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013871{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013872 if (ctxt == NULL)
13873 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000013874 if (encoding != NULL) {
13875 if (ctxt->encoding != NULL)
13876 xmlFree((xmlChar *) ctxt->encoding);
13877 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13878 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013879 if (options & XML_PARSE_RECOVER) {
13880 ctxt->recovery = 1;
13881 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013882 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013883 } else
13884 ctxt->recovery = 0;
13885 if (options & XML_PARSE_DTDLOAD) {
13886 ctxt->loadsubset = XML_DETECT_IDS;
13887 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013888 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013889 } else
13890 ctxt->loadsubset = 0;
13891 if (options & XML_PARSE_DTDATTR) {
13892 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13893 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013894 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013895 }
13896 if (options & XML_PARSE_NOENT) {
13897 ctxt->replaceEntities = 1;
13898 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13899 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013900 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013901 } else
13902 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013903 if (options & XML_PARSE_PEDANTIC) {
13904 ctxt->pedantic = 1;
13905 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013906 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013907 } else
13908 ctxt->pedantic = 0;
13909 if (options & XML_PARSE_NOBLANKS) {
13910 ctxt->keepBlanks = 0;
13911 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13912 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013913 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013914 } else
13915 ctxt->keepBlanks = 1;
13916 if (options & XML_PARSE_DTDVALID) {
13917 ctxt->validate = 1;
13918 if (options & XML_PARSE_NOWARNING)
13919 ctxt->vctxt.warning = NULL;
13920 if (options & XML_PARSE_NOERROR)
13921 ctxt->vctxt.error = NULL;
13922 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013923 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013924 } else
13925 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013926 if (options & XML_PARSE_NOWARNING) {
13927 ctxt->sax->warning = NULL;
13928 options -= XML_PARSE_NOWARNING;
13929 }
13930 if (options & XML_PARSE_NOERROR) {
13931 ctxt->sax->error = NULL;
13932 ctxt->sax->fatalError = NULL;
13933 options -= XML_PARSE_NOERROR;
13934 }
Daniel Veillard81273902003-09-30 00:43:48 +000013935#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013936 if (options & XML_PARSE_SAX1) {
13937 ctxt->sax->startElement = xmlSAX2StartElement;
13938 ctxt->sax->endElement = xmlSAX2EndElement;
13939 ctxt->sax->startElementNs = NULL;
13940 ctxt->sax->endElementNs = NULL;
13941 ctxt->sax->initialized = 1;
13942 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013943 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013944 }
Daniel Veillard81273902003-09-30 00:43:48 +000013945#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013946 if (options & XML_PARSE_NODICT) {
13947 ctxt->dictNames = 0;
13948 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013949 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013950 } else {
13951 ctxt->dictNames = 1;
13952 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013953 if (options & XML_PARSE_NOCDATA) {
13954 ctxt->sax->cdataBlock = NULL;
13955 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013956 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013957 }
13958 if (options & XML_PARSE_NSCLEAN) {
13959 ctxt->options |= XML_PARSE_NSCLEAN;
13960 options -= XML_PARSE_NSCLEAN;
13961 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013962 if (options & XML_PARSE_NONET) {
13963 ctxt->options |= XML_PARSE_NONET;
13964 options -= XML_PARSE_NONET;
13965 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013966 if (options & XML_PARSE_COMPACT) {
13967 ctxt->options |= XML_PARSE_COMPACT;
13968 options -= XML_PARSE_COMPACT;
13969 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000013970 if (options & XML_PARSE_OLD10) {
13971 ctxt->options |= XML_PARSE_OLD10;
13972 options -= XML_PARSE_OLD10;
13973 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013974 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013975 return (options);
13976}
13977
13978/**
Daniel Veillard37334572008-07-31 08:20:02 +000013979 * xmlCtxtUseOptions:
13980 * @ctxt: an XML parser context
13981 * @options: a combination of xmlParserOption
13982 *
13983 * Applies the options to the parser context
13984 *
13985 * Returns 0 in case of success, the set of unknown or unimplemented options
13986 * in case of error.
13987 */
13988int
13989xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13990{
13991 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
13992}
13993
13994/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013995 * xmlDoRead:
13996 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013997 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013998 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013999 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014000 * @reuse: keep the context for reuse
14001 *
14002 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014003 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014004 * Returns the resulting document tree or NULL
14005 */
14006static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014007xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14008 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014009{
14010 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014011
14012 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014013 if (encoding != NULL) {
14014 xmlCharEncodingHandlerPtr hdlr;
14015
14016 hdlr = xmlFindCharEncodingHandler(encoding);
14017 if (hdlr != NULL)
14018 xmlSwitchToEncoding(ctxt, hdlr);
14019 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014020 if ((URL != NULL) && (ctxt->input != NULL) &&
14021 (ctxt->input->filename == NULL))
14022 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014023 xmlParseDocument(ctxt);
14024 if ((ctxt->wellFormed) || ctxt->recovery)
14025 ret = ctxt->myDoc;
14026 else {
14027 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014028 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014029 xmlFreeDoc(ctxt->myDoc);
14030 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014031 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014032 ctxt->myDoc = NULL;
14033 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014034 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014035 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014036
14037 return (ret);
14038}
14039
14040/**
14041 * xmlReadDoc:
14042 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014043 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014044 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014045 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014046 *
14047 * parse an XML in-memory document and build a tree.
14048 *
14049 * Returns the resulting document tree
14050 */
14051xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014052xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014053{
14054 xmlParserCtxtPtr ctxt;
14055
14056 if (cur == NULL)
14057 return (NULL);
14058
14059 ctxt = xmlCreateDocParserCtxt(cur);
14060 if (ctxt == NULL)
14061 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014062 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014063}
14064
14065/**
14066 * xmlReadFile:
14067 * @filename: a file or URL
14068 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014069 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014070 *
14071 * parse an XML file from the filesystem or the network.
14072 *
14073 * Returns the resulting document tree
14074 */
14075xmlDocPtr
14076xmlReadFile(const char *filename, const char *encoding, int options)
14077{
14078 xmlParserCtxtPtr ctxt;
14079
Daniel Veillard61b93382003-11-03 14:28:31 +000014080 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014081 if (ctxt == NULL)
14082 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014083 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014084}
14085
14086/**
14087 * xmlReadMemory:
14088 * @buffer: a pointer to a char array
14089 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014090 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014091 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014092 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014093 *
14094 * parse an XML in-memory document and build a tree.
14095 *
14096 * Returns the resulting document tree
14097 */
14098xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014099xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014100{
14101 xmlParserCtxtPtr ctxt;
14102
14103 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14104 if (ctxt == NULL)
14105 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014106 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014107}
14108
14109/**
14110 * xmlReadFd:
14111 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014112 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014113 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014114 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014115 *
14116 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014117 * NOTE that the file descriptor will not be closed when the
14118 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014119 *
14120 * Returns the resulting document tree
14121 */
14122xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014123xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014124{
14125 xmlParserCtxtPtr ctxt;
14126 xmlParserInputBufferPtr input;
14127 xmlParserInputPtr stream;
14128
14129 if (fd < 0)
14130 return (NULL);
14131
14132 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14133 if (input == NULL)
14134 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014135 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014136 ctxt = xmlNewParserCtxt();
14137 if (ctxt == NULL) {
14138 xmlFreeParserInputBuffer(input);
14139 return (NULL);
14140 }
14141 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14142 if (stream == NULL) {
14143 xmlFreeParserInputBuffer(input);
14144 xmlFreeParserCtxt(ctxt);
14145 return (NULL);
14146 }
14147 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014148 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014149}
14150
14151/**
14152 * xmlReadIO:
14153 * @ioread: an I/O read function
14154 * @ioclose: an I/O close function
14155 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014156 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014157 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014158 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014159 *
14160 * parse an XML document from I/O functions and source and build a tree.
14161 *
14162 * Returns the resulting document tree
14163 */
14164xmlDocPtr
14165xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014166 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014167{
14168 xmlParserCtxtPtr ctxt;
14169 xmlParserInputBufferPtr input;
14170 xmlParserInputPtr stream;
14171
14172 if (ioread == NULL)
14173 return (NULL);
14174
14175 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14176 XML_CHAR_ENCODING_NONE);
14177 if (input == NULL)
14178 return (NULL);
14179 ctxt = xmlNewParserCtxt();
14180 if (ctxt == NULL) {
14181 xmlFreeParserInputBuffer(input);
14182 return (NULL);
14183 }
14184 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14185 if (stream == NULL) {
14186 xmlFreeParserInputBuffer(input);
14187 xmlFreeParserCtxt(ctxt);
14188 return (NULL);
14189 }
14190 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014191 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014192}
14193
14194/**
14195 * xmlCtxtReadDoc:
14196 * @ctxt: an XML parser context
14197 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014198 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014199 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014200 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014201 *
14202 * parse an XML in-memory document and build a tree.
14203 * This reuses the existing @ctxt parser context
14204 *
14205 * Returns the resulting document tree
14206 */
14207xmlDocPtr
14208xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014209 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014210{
14211 xmlParserInputPtr stream;
14212
14213 if (cur == NULL)
14214 return (NULL);
14215 if (ctxt == NULL)
14216 return (NULL);
14217
14218 xmlCtxtReset(ctxt);
14219
14220 stream = xmlNewStringInputStream(ctxt, cur);
14221 if (stream == NULL) {
14222 return (NULL);
14223 }
14224 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014225 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014226}
14227
14228/**
14229 * xmlCtxtReadFile:
14230 * @ctxt: an XML parser context
14231 * @filename: a file or URL
14232 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014233 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014234 *
14235 * parse an XML file from the filesystem or the network.
14236 * This reuses the existing @ctxt parser context
14237 *
14238 * Returns the resulting document tree
14239 */
14240xmlDocPtr
14241xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14242 const char *encoding, int options)
14243{
14244 xmlParserInputPtr stream;
14245
14246 if (filename == NULL)
14247 return (NULL);
14248 if (ctxt == NULL)
14249 return (NULL);
14250
14251 xmlCtxtReset(ctxt);
14252
Daniel Veillard29614c72004-11-26 10:47:26 +000014253 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014254 if (stream == NULL) {
14255 return (NULL);
14256 }
14257 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014258 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014259}
14260
14261/**
14262 * xmlCtxtReadMemory:
14263 * @ctxt: an XML parser context
14264 * @buffer: a pointer to a char array
14265 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014266 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014268 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014269 *
14270 * parse an XML in-memory document and build a tree.
14271 * This reuses the existing @ctxt parser context
14272 *
14273 * Returns the resulting document tree
14274 */
14275xmlDocPtr
14276xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014277 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014278{
14279 xmlParserInputBufferPtr input;
14280 xmlParserInputPtr stream;
14281
14282 if (ctxt == NULL)
14283 return (NULL);
14284 if (buffer == NULL)
14285 return (NULL);
14286
14287 xmlCtxtReset(ctxt);
14288
14289 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14290 if (input == NULL) {
14291 return(NULL);
14292 }
14293
14294 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14295 if (stream == NULL) {
14296 xmlFreeParserInputBuffer(input);
14297 return(NULL);
14298 }
14299
14300 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014301 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014302}
14303
14304/**
14305 * xmlCtxtReadFd:
14306 * @ctxt: an XML parser context
14307 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014308 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014309 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014310 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014311 *
14312 * parse an XML from a file descriptor and build a tree.
14313 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014314 * NOTE that the file descriptor will not be closed when the
14315 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014316 *
14317 * Returns the resulting document tree
14318 */
14319xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014320xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14321 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014322{
14323 xmlParserInputBufferPtr input;
14324 xmlParserInputPtr stream;
14325
14326 if (fd < 0)
14327 return (NULL);
14328 if (ctxt == NULL)
14329 return (NULL);
14330
14331 xmlCtxtReset(ctxt);
14332
14333
14334 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14335 if (input == NULL)
14336 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014337 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014338 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14339 if (stream == NULL) {
14340 xmlFreeParserInputBuffer(input);
14341 return (NULL);
14342 }
14343 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014344 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014345}
14346
14347/**
14348 * xmlCtxtReadIO:
14349 * @ctxt: an XML parser context
14350 * @ioread: an I/O read function
14351 * @ioclose: an I/O close function
14352 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014353 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014354 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014355 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014356 *
14357 * parse an XML document from I/O functions and source and build a tree.
14358 * This reuses the existing @ctxt parser context
14359 *
14360 * Returns the resulting document tree
14361 */
14362xmlDocPtr
14363xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14364 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014365 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014366 const char *encoding, int options)
14367{
14368 xmlParserInputBufferPtr input;
14369 xmlParserInputPtr stream;
14370
14371 if (ioread == NULL)
14372 return (NULL);
14373 if (ctxt == NULL)
14374 return (NULL);
14375
14376 xmlCtxtReset(ctxt);
14377
14378 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14379 XML_CHAR_ENCODING_NONE);
14380 if (input == NULL)
14381 return (NULL);
14382 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14383 if (stream == NULL) {
14384 xmlFreeParserInputBuffer(input);
14385 return (NULL);
14386 }
14387 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014388 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014389}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014390
14391#define bottom_parser
14392#include "elfgcchack.h"