blob: 44685b4a78aa8b2c4cddba8c98b133cb5d20f54c [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1020 defaults, NULL) < 0) {
1021 xmlFree(defaults);
1022 goto mem_error;
1023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 xmlDefAttrsPtr temp;
1026
1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001029 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001031 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1034 defaults, NULL) < 0) {
1035 xmlFree(defaults);
1036 goto mem_error;
1037 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001038 }
1039
1040 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001041 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001042 * are within the DTD and hen not associated to namespace names.
1043 */
1044 name = xmlSplitQName3(fullattr, &len);
1045 if (name == NULL) {
1046 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1047 prefix = NULL;
1048 } else {
1049 name = xmlDictLookup(ctxt->dict, name, -1);
1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1051 }
1052
1053 defaults->values[4 * defaults->nbAttrs] = name;
1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1055 /* intern the string and precompute the end */
1056 len = xmlStrlen(value);
1057 value = xmlDictLookup(ctxt->dict, value, len);
1058 defaults->values[4 * defaults->nbAttrs + 2] = value;
1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1060 defaults->nbAttrs++;
1061
1062 return;
1063
1064mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001066 return;
1067}
1068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069/**
1070 * xmlAddSpecialAttr:
1071 * @ctxt: an XML parser context
1072 * @fullname: the element fullname
1073 * @fullattr: the attribute fullname
1074 * @type: the attribute type
1075 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001076 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001077 */
1078static void
1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1080 const xmlChar *fullname,
1081 const xmlChar *fullattr,
1082 int type)
1083{
1084 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001086 if (ctxt->attsSpecial == NULL)
1087 goto mem_error;
1088 }
1089
Daniel Veillardac4118d2008-01-11 05:27:32 +00001090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1091 return;
1092
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1094 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001095 return;
1096
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001099 return;
1100}
1101
Daniel Veillard4432df22003-09-28 18:58:27 +00001102/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001103 * xmlCleanSpecialAttrCallback:
1104 *
1105 * Removes CDATA attributes from the special attribute table
1106 */
1107static void
1108xmlCleanSpecialAttrCallback(void *payload, void *data,
1109 const xmlChar *fullname, const xmlChar *fullattr,
1110 const xmlChar *unused ATTRIBUTE_UNUSED) {
1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1112
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001113 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1115 }
1116}
1117
1118/**
1119 * xmlCleanSpecialAttr:
1120 * @ctxt: an XML parser context
1121 *
1122 * Trim the list of attributes defined to remove all those of type
1123 * CDATA as they are not special. This call should be done when finishing
1124 * to parse the DTD and before starting to parse the document root.
1125 */
1126static void
1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1128{
1129 if (ctxt->attsSpecial == NULL)
1130 return;
1131
1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1133
1134 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1135 xmlHashFree(ctxt->attsSpecial, NULL);
1136 ctxt->attsSpecial = NULL;
1137 }
1138 return;
1139}
1140
1141/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001142 * xmlCheckLanguageID:
1143 * @lang: pointer to the string value
1144 *
1145 * Checks that the value conforms to the LanguageID production:
1146 *
1147 * NOTE: this is somewhat deprecated, those productions were removed from
1148 * the XML Second edition.
1149 *
1150 * [33] LanguageID ::= Langcode ('-' Subcode)*
1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1155 * [38] Subcode ::= ([a-z] | [A-Z])+
1156 *
1157 * Returns 1 if correct 0 otherwise
1158 **/
1159int
1160xmlCheckLanguageID(const xmlChar * lang)
1161{
1162 const xmlChar *cur = lang;
1163
1164 if (cur == NULL)
1165 return (0);
1166 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1167 ((cur[0] == 'I') && (cur[1] == '-'))) {
1168 /*
1169 * IANA code
1170 */
1171 cur += 2;
1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1173 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1174 cur++;
1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1176 ((cur[0] == 'X') && (cur[1] == '-'))) {
1177 /*
1178 * User code
1179 */
1180 cur += 2;
1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1182 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1183 cur++;
1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1186 /*
1187 * ISO639
1188 */
1189 cur++;
1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1191 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1192 cur++;
1193 else
1194 return (0);
1195 } else
1196 return (0);
1197 while (cur[0] != 0) { /* non input consuming */
1198 if (cur[0] != '-')
1199 return (0);
1200 cur++;
1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1202 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1203 cur++;
1204 else
1205 return (0);
1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1207 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1208 cur++;
1209 }
1210 return (1);
1211}
1212
Owen Taylor3473f882001-02-23 17:55:21 +00001213/************************************************************************
1214 * *
1215 * Parser stacks related functions and macros *
1216 * *
1217 ************************************************************************/
1218
1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1220 const xmlChar ** str);
1221
Daniel Veillard0fb18932003-09-07 09:14:37 +00001222#ifdef SAX2
1223/**
1224 * nsPush:
1225 * @ctxt: an XML parser context
1226 * @prefix: the namespace prefix or NULL
1227 * @URL: the namespace name
1228 *
1229 * Pushes a new parser namespace on top of the ns stack
1230 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001231 * Returns -1 in case of error, -2 if the namespace should be discarded
1232 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001233 */
1234static int
1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1236{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001237 if (ctxt->options & XML_PARSE_NSCLEAN) {
1238 int i;
1239 for (i = 0;i < ctxt->nsNr;i += 2) {
1240 if (ctxt->nsTab[i] == prefix) {
1241 /* in scope */
1242 if (ctxt->nsTab[i + 1] == URL)
1243 return(-2);
1244 /* out of scope keep it */
1245 break;
1246 }
1247 }
1248 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1250 ctxt->nsMax = 10;
1251 ctxt->nsNr = 0;
1252 ctxt->nsTab = (const xmlChar **)
1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1254 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001256 ctxt->nsMax = 0;
1257 return (-1);
1258 }
1259 } else if (ctxt->nsNr >= ctxt->nsMax) {
1260 ctxt->nsMax *= 2;
1261 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001262 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001263 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1264 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001265 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001266 ctxt->nsMax /= 2;
1267 return (-1);
1268 }
1269 }
1270 ctxt->nsTab[ctxt->nsNr++] = prefix;
1271 ctxt->nsTab[ctxt->nsNr++] = URL;
1272 return (ctxt->nsNr);
1273}
1274/**
1275 * nsPop:
1276 * @ctxt: an XML parser context
1277 * @nr: the number to pop
1278 *
1279 * Pops the top @nr parser prefix/namespace from the ns stack
1280 *
1281 * Returns the number of namespaces removed
1282 */
1283static int
1284nsPop(xmlParserCtxtPtr ctxt, int nr)
1285{
1286 int i;
1287
1288 if (ctxt->nsTab == NULL) return(0);
1289 if (ctxt->nsNr < nr) {
1290 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1291 nr = ctxt->nsNr;
1292 }
1293 if (ctxt->nsNr <= 0)
1294 return (0);
1295
1296 for (i = 0;i < nr;i++) {
1297 ctxt->nsNr--;
1298 ctxt->nsTab[ctxt->nsNr] = NULL;
1299 }
1300 return(nr);
1301}
1302#endif
1303
1304static int
1305xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1306 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001307 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001308 int maxatts;
1309
1310 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001312 atts = (const xmlChar **)
1313 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001314 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001315 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001316 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1317 if (attallocs == NULL) goto mem_error;
1318 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001319 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001320 } else if (nr + 5 > ctxt->maxatts) {
1321 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001322 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1323 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001324 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001325 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001326 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1327 (maxatts / 5) * sizeof(int));
1328 if (attallocs == NULL) goto mem_error;
1329 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001330 ctxt->maxatts = maxatts;
1331 }
1332 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001333mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001334 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001335 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001336}
1337
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001338/**
1339 * inputPush:
1340 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001341 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001342 *
1343 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001344 *
1345 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001346 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001347int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001348inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1349{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001350 if ((ctxt == NULL) || (value == NULL))
1351 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001352 if (ctxt->inputNr >= ctxt->inputMax) {
1353 ctxt->inputMax *= 2;
1354 ctxt->inputTab =
1355 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1356 ctxt->inputMax *
1357 sizeof(ctxt->inputTab[0]));
1358 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001359 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001360 return (0);
1361 }
1362 }
1363 ctxt->inputTab[ctxt->inputNr] = value;
1364 ctxt->input = value;
1365 return (ctxt->inputNr++);
1366}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001367/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001368 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001369 * @ctxt: an XML parser context
1370 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001372 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001373 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001374 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001375xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001376inputPop(xmlParserCtxtPtr ctxt)
1377{
1378 xmlParserInputPtr ret;
1379
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001380 if (ctxt == NULL)
1381 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001383 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384 ctxt->inputNr--;
1385 if (ctxt->inputNr > 0)
1386 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1387 else
1388 ctxt->input = NULL;
1389 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001390 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001391 return (ret);
1392}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001393/**
1394 * nodePush:
1395 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001396 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001397 *
1398 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001399 *
1400 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001401 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001402int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1404{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001405 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001406 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001407 xmlNodePtr *tmp;
1408
1409 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1410 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001411 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001412 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001413 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001414 return (0);
1415 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001416 ctxt->nodeTab = tmp;
1417 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001418 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001419 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001420 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001421 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1422 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001423 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001424 return(0);
1425 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 ctxt->nodeTab[ctxt->nodeNr] = value;
1427 ctxt->node = value;
1428 return (ctxt->nodeNr++);
1429}
1430/**
1431 * nodePop:
1432 * @ctxt: an XML parser context
1433 *
1434 * Pops the top element node from the node stack
1435 *
1436 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001438xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001439nodePop(xmlParserCtxtPtr ctxt)
1440{
1441 xmlNodePtr ret;
1442
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001443 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001444 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001445 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001446 ctxt->nodeNr--;
1447 if (ctxt->nodeNr > 0)
1448 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1449 else
1450 ctxt->node = NULL;
1451 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001452 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001453 return (ret);
1454}
Daniel Veillarda2351322004-06-27 12:08:10 +00001455
1456#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001457/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001458 * nameNsPush:
1459 * @ctxt: an XML parser context
1460 * @value: the element name
1461 * @prefix: the element prefix
1462 * @URI: the element namespace name
1463 *
1464 * Pushes a new element name/prefix/URL on top of the name stack
1465 *
1466 * Returns -1 in case of error, the index in the stack otherwise
1467 */
1468static int
1469nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1470 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1471{
1472 if (ctxt->nameNr >= ctxt->nameMax) {
1473 const xmlChar * *tmp;
1474 void **tmp2;
1475 ctxt->nameMax *= 2;
1476 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1477 ctxt->nameMax *
1478 sizeof(ctxt->nameTab[0]));
1479 if (tmp == NULL) {
1480 ctxt->nameMax /= 2;
1481 goto mem_error;
1482 }
1483 ctxt->nameTab = tmp;
1484 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1485 ctxt->nameMax * 3 *
1486 sizeof(ctxt->pushTab[0]));
1487 if (tmp2 == NULL) {
1488 ctxt->nameMax /= 2;
1489 goto mem_error;
1490 }
1491 ctxt->pushTab = tmp2;
1492 }
1493 ctxt->nameTab[ctxt->nameNr] = value;
1494 ctxt->name = value;
1495 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1496 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001497 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001498 return (ctxt->nameNr++);
1499mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001500 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001501 return (-1);
1502}
1503/**
1504 * nameNsPop:
1505 * @ctxt: an XML parser context
1506 *
1507 * Pops the top element/prefix/URI name from the name stack
1508 *
1509 * Returns the name just removed
1510 */
1511static const xmlChar *
1512nameNsPop(xmlParserCtxtPtr ctxt)
1513{
1514 const xmlChar *ret;
1515
1516 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001517 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001518 ctxt->nameNr--;
1519 if (ctxt->nameNr > 0)
1520 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1521 else
1522 ctxt->name = NULL;
1523 ret = ctxt->nameTab[ctxt->nameNr];
1524 ctxt->nameTab[ctxt->nameNr] = NULL;
1525 return (ret);
1526}
Daniel Veillarda2351322004-06-27 12:08:10 +00001527#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001528
1529/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001530 * namePush:
1531 * @ctxt: an XML parser context
1532 * @value: the element name
1533 *
1534 * Pushes a new element name on top of the name stack
1535 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001536 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001537 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001538int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001539namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001540{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001541 if (ctxt == NULL) return (-1);
1542
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001544 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001546 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 ctxt->nameMax *
1548 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001549 if (tmp == NULL) {
1550 ctxt->nameMax /= 2;
1551 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001552 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001553 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001554 }
1555 ctxt->nameTab[ctxt->nameNr] = value;
1556 ctxt->name = value;
1557 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001558mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001559 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001560 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001561}
1562/**
1563 * namePop:
1564 * @ctxt: an XML parser context
1565 *
1566 * Pops the top element name from the name stack
1567 *
1568 * Returns the name just removed
1569 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001570const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571namePop(xmlParserCtxtPtr ctxt)
1572{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001573 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001574
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001575 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1576 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001577 ctxt->nameNr--;
1578 if (ctxt->nameNr > 0)
1579 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1580 else
1581 ctxt->name = NULL;
1582 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001583 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001584 return (ret);
1585}
Owen Taylor3473f882001-02-23 17:55:21 +00001586
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001587static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001588 if (ctxt->spaceNr >= ctxt->spaceMax) {
1589 ctxt->spaceMax *= 2;
1590 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1591 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1592 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001593 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001594 return(0);
1595 }
1596 }
1597 ctxt->spaceTab[ctxt->spaceNr] = val;
1598 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1599 return(ctxt->spaceNr++);
1600}
1601
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001602static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001603 int ret;
1604 if (ctxt->spaceNr <= 0) return(0);
1605 ctxt->spaceNr--;
1606 if (ctxt->spaceNr > 0)
1607 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1608 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001609 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ret = ctxt->spaceTab[ctxt->spaceNr];
1611 ctxt->spaceTab[ctxt->spaceNr] = -1;
1612 return(ret);
1613}
1614
1615/*
1616 * Macros for accessing the content. Those should be used only by the parser,
1617 * and not exported.
1618 *
1619 * Dirty macros, i.e. one often need to make assumption on the context to
1620 * use them
1621 *
1622 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1623 * To be used with extreme caution since operations consuming
1624 * characters may move the input buffer to a different location !
1625 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1626 * This should be used internally by the parser
1627 * only to compare to ASCII values otherwise it would break when
1628 * running with UTF-8 encoding.
1629 * RAW same as CUR but in the input buffer, bypass any token
1630 * extraction that may have been done
1631 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1632 * to compare on ASCII based substring.
1633 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001634 * strings without newlines within the parser.
1635 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1636 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001637 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1638 *
1639 * NEXT Skip to the next character, this does the proper decoding
1640 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001641 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001642 * CUR_CHAR(l) returns the current unicode character (int), set l
1643 * to the number of xmlChars used for the encoding [0-5].
1644 * CUR_SCHAR same but operate on a string instead of the context
1645 * COPY_BUF copy the current unicode char to the target buffer, increment
1646 * the index
1647 * GROW, SHRINK handling of input buffers
1648 */
1649
Daniel Veillardfdc91562002-07-01 21:52:03 +00001650#define RAW (*ctxt->input->cur)
1651#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001652#define NXT(val) ctxt->input->cur[(val)]
1653#define CUR_PTR ctxt->input->cur
1654
Daniel Veillarda07050d2003-10-19 14:46:32 +00001655#define CMP4( s, c1, c2, c3, c4 ) \
1656 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1657 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1658#define CMP5( s, c1, c2, c3, c4, c5 ) \
1659 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1660#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1661 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1662#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1663 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1664#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1665 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1666#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1667 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1668 ((unsigned char *) s)[ 8 ] == c9 )
1669#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1670 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1671 ((unsigned char *) s)[ 9 ] == c10 )
1672
Owen Taylor3473f882001-02-23 17:55:21 +00001673#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001674 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001675 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001676 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001677 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1678 xmlPopInput(ctxt); \
1679 } while (0)
1680
Daniel Veillard0b787f32004-03-26 17:29:53 +00001681#define SKIPL(val) do { \
1682 int skipl; \
1683 for(skipl=0; skipl<val; skipl++) { \
1684 if (*(ctxt->input->cur) == '\n') { \
1685 ctxt->input->line++; ctxt->input->col = 1; \
1686 } else ctxt->input->col++; \
1687 ctxt->nbChars++; \
1688 ctxt->input->cur++; \
1689 } \
1690 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1691 if ((*ctxt->input->cur == 0) && \
1692 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1693 xmlPopInput(ctxt); \
1694 } while (0)
1695
Daniel Veillarda880b122003-04-21 21:36:41 +00001696#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001697 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1698 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001699 xmlSHRINK (ctxt);
1700
1701static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1702 xmlParserInputShrink(ctxt->input);
1703 if ((*ctxt->input->cur == 0) &&
1704 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1705 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001706 }
Owen Taylor3473f882001-02-23 17:55:21 +00001707
Daniel Veillarda880b122003-04-21 21:36:41 +00001708#define GROW if ((ctxt->progressive == 0) && \
1709 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001710 xmlGROW (ctxt);
1711
1712static void xmlGROW (xmlParserCtxtPtr ctxt) {
1713 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1714 if ((*ctxt->input->cur == 0) &&
1715 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1716 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001717}
Owen Taylor3473f882001-02-23 17:55:21 +00001718
1719#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1720
1721#define NEXT xmlNextChar(ctxt)
1722
Daniel Veillard21a0f912001-02-25 19:54:14 +00001723#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001724 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001725 ctxt->input->cur++; \
1726 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001727 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001728 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1729 }
1730
Owen Taylor3473f882001-02-23 17:55:21 +00001731#define NEXTL(l) do { \
1732 if (*(ctxt->input->cur) == '\n') { \
1733 ctxt->input->line++; ctxt->input->col = 1; \
1734 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001735 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001736 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001737 } while (0)
1738
1739#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1740#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1741
1742#define COPY_BUF(l,b,i,v) \
1743 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001744 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001745
1746/**
1747 * xmlSkipBlankChars:
1748 * @ctxt: the XML parser context
1749 *
1750 * skip all blanks character found at that point in the input streams.
1751 * It pops up finished entities in the process if allowable at that point.
1752 *
1753 * Returns the number of space chars skipped
1754 */
1755
1756int
1757xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001758 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001759
1760 /*
1761 * It's Okay to use CUR/NEXT here since all the blanks are on
1762 * the ASCII range.
1763 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001764 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1765 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001766 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001767 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001768 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001769 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001770 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001771 if (*cur == '\n') {
1772 ctxt->input->line++; ctxt->input->col = 1;
1773 }
1774 cur++;
1775 res++;
1776 if (*cur == 0) {
1777 ctxt->input->cur = cur;
1778 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1779 cur = ctxt->input->cur;
1780 }
1781 }
1782 ctxt->input->cur = cur;
1783 } else {
1784 int cur;
1785 do {
1786 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001787 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001788 NEXT;
1789 cur = CUR;
1790 res++;
1791 }
1792 while ((cur == 0) && (ctxt->inputNr > 1) &&
1793 (ctxt->instate != XML_PARSER_COMMENT)) {
1794 xmlPopInput(ctxt);
1795 cur = CUR;
1796 }
1797 /*
1798 * Need to handle support of entities branching here
1799 */
1800 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1801 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1802 }
Owen Taylor3473f882001-02-23 17:55:21 +00001803 return(res);
1804}
1805
1806/************************************************************************
1807 * *
1808 * Commodity functions to handle entities *
1809 * *
1810 ************************************************************************/
1811
1812/**
1813 * xmlPopInput:
1814 * @ctxt: an XML parser context
1815 *
1816 * xmlPopInput: the current input pointed by ctxt->input came to an end
1817 * pop it and return the next char.
1818 *
1819 * Returns the current xmlChar in the parser context
1820 */
1821xmlChar
1822xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001823 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001824 if (xmlParserDebugEntities)
1825 xmlGenericError(xmlGenericErrorContext,
1826 "Popping input %d\n", ctxt->inputNr);
1827 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001828 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001829 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1830 return(xmlPopInput(ctxt));
1831 return(CUR);
1832}
1833
1834/**
1835 * xmlPushInput:
1836 * @ctxt: an XML parser context
1837 * @input: an XML parser input fragment (entity, XML fragment ...).
1838 *
1839 * xmlPushInput: switch to a new input stream which is stacked on top
1840 * of the previous one(s).
1841 */
1842void
1843xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1844 if (input == NULL) return;
1845
1846 if (xmlParserDebugEntities) {
1847 if ((ctxt->input != NULL) && (ctxt->input->filename))
1848 xmlGenericError(xmlGenericErrorContext,
1849 "%s(%d): ", ctxt->input->filename,
1850 ctxt->input->line);
1851 xmlGenericError(xmlGenericErrorContext,
1852 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1853 }
1854 inputPush(ctxt, input);
1855 GROW;
1856}
1857
1858/**
1859 * xmlParseCharRef:
1860 * @ctxt: an XML parser context
1861 *
1862 * parse Reference declarations
1863 *
1864 * [66] CharRef ::= '&#' [0-9]+ ';' |
1865 * '&#x' [0-9a-fA-F]+ ';'
1866 *
1867 * [ WFC: Legal Character ]
1868 * Characters referred to using character references must match the
1869 * production for Char.
1870 *
1871 * Returns the value parsed (as an int), 0 in case of error
1872 */
1873int
1874xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001875 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001876 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001877 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001878
Owen Taylor3473f882001-02-23 17:55:21 +00001879 /*
1880 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1881 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001882 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001883 (NXT(2) == 'x')) {
1884 SKIP(3);
1885 GROW;
1886 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001887 if (count++ > 20) {
1888 count = 0;
1889 GROW;
1890 }
1891 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001892 val = val * 16 + (CUR - '0');
1893 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1894 val = val * 16 + (CUR - 'a') + 10;
1895 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1896 val = val * 16 + (CUR - 'A') + 10;
1897 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001898 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001899 val = 0;
1900 break;
1901 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001902 if (val > 0x10FFFF)
1903 outofrange = val;
1904
Owen Taylor3473f882001-02-23 17:55:21 +00001905 NEXT;
1906 count++;
1907 }
1908 if (RAW == ';') {
1909 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001910 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001911 ctxt->nbChars ++;
1912 ctxt->input->cur++;
1913 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001914 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001915 SKIP(2);
1916 GROW;
1917 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001918 if (count++ > 20) {
1919 count = 0;
1920 GROW;
1921 }
1922 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001923 val = val * 10 + (CUR - '0');
1924 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001925 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001926 val = 0;
1927 break;
1928 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001929 if (val > 0x10FFFF)
1930 outofrange = val;
1931
Owen Taylor3473f882001-02-23 17:55:21 +00001932 NEXT;
1933 count++;
1934 }
1935 if (RAW == ';') {
1936 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001937 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001938 ctxt->nbChars ++;
1939 ctxt->input->cur++;
1940 }
1941 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001942 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001943 }
1944
1945 /*
1946 * [ WFC: Legal Character ]
1947 * Characters referred to using character references must match the
1948 * production for Char.
1949 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001950 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001951 return(val);
1952 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001953 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1954 "xmlParseCharRef: invalid xmlChar value %d\n",
1955 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001956 }
1957 return(0);
1958}
1959
1960/**
1961 * xmlParseStringCharRef:
1962 * @ctxt: an XML parser context
1963 * @str: a pointer to an index in the string
1964 *
1965 * parse Reference declarations, variant parsing from a string rather
1966 * than an an input flow.
1967 *
1968 * [66] CharRef ::= '&#' [0-9]+ ';' |
1969 * '&#x' [0-9a-fA-F]+ ';'
1970 *
1971 * [ WFC: Legal Character ]
1972 * Characters referred to using character references must match the
1973 * production for Char.
1974 *
1975 * Returns the value parsed (as an int), 0 in case of error, str will be
1976 * updated to the current value of the index
1977 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001978static int
Owen Taylor3473f882001-02-23 17:55:21 +00001979xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1980 const xmlChar *ptr;
1981 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001982 unsigned int val = 0;
1983 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001984
1985 if ((str == NULL) || (*str == NULL)) return(0);
1986 ptr = *str;
1987 cur = *ptr;
1988 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1989 ptr += 3;
1990 cur = *ptr;
1991 while (cur != ';') { /* Non input consuming loop */
1992 if ((cur >= '0') && (cur <= '9'))
1993 val = val * 16 + (cur - '0');
1994 else if ((cur >= 'a') && (cur <= 'f'))
1995 val = val * 16 + (cur - 'a') + 10;
1996 else if ((cur >= 'A') && (cur <= 'F'))
1997 val = val * 16 + (cur - 'A') + 10;
1998 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001999 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002000 val = 0;
2001 break;
2002 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002003 if (val > 0x10FFFF)
2004 outofrange = val;
2005
Owen Taylor3473f882001-02-23 17:55:21 +00002006 ptr++;
2007 cur = *ptr;
2008 }
2009 if (cur == ';')
2010 ptr++;
2011 } else if ((cur == '&') && (ptr[1] == '#')){
2012 ptr += 2;
2013 cur = *ptr;
2014 while (cur != ';') { /* Non input consuming loops */
2015 if ((cur >= '0') && (cur <= '9'))
2016 val = val * 10 + (cur - '0');
2017 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002018 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002019 val = 0;
2020 break;
2021 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002022 if (val > 0x10FFFF)
2023 outofrange = val;
2024
Owen Taylor3473f882001-02-23 17:55:21 +00002025 ptr++;
2026 cur = *ptr;
2027 }
2028 if (cur == ';')
2029 ptr++;
2030 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002031 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002032 return(0);
2033 }
2034 *str = ptr;
2035
2036 /*
2037 * [ WFC: Legal Character ]
2038 * Characters referred to using character references must match the
2039 * production for Char.
2040 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002041 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002042 return(val);
2043 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002044 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2045 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2046 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 }
2048 return(0);
2049}
2050
2051/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002052 * xmlNewBlanksWrapperInputStream:
2053 * @ctxt: an XML parser context
2054 * @entity: an Entity pointer
2055 *
2056 * Create a new input stream for wrapping
2057 * blanks around a PEReference
2058 *
2059 * Returns the new input stream or NULL
2060 */
2061
2062static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2063
Daniel Veillardf4862f02002-09-10 11:13:43 +00002064static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002065xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2066 xmlParserInputPtr input;
2067 xmlChar *buffer;
2068 size_t length;
2069 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2071 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002072 return(NULL);
2073 }
2074 if (xmlParserDebugEntities)
2075 xmlGenericError(xmlGenericErrorContext,
2076 "new blanks wrapper for entity: %s\n", entity->name);
2077 input = xmlNewInputStream(ctxt);
2078 if (input == NULL) {
2079 return(NULL);
2080 }
2081 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002082 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002083 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002084 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002085 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002086 return(NULL);
2087 }
2088 buffer [0] = ' ';
2089 buffer [1] = '%';
2090 buffer [length-3] = ';';
2091 buffer [length-2] = ' ';
2092 buffer [length-1] = 0;
2093 memcpy(buffer + 2, entity->name, length - 5);
2094 input->free = deallocblankswrapper;
2095 input->base = buffer;
2096 input->cur = buffer;
2097 input->length = length;
2098 input->end = &buffer[length];
2099 return(input);
2100}
2101
2102/**
Owen Taylor3473f882001-02-23 17:55:21 +00002103 * xmlParserHandlePEReference:
2104 * @ctxt: the parser context
2105 *
2106 * [69] PEReference ::= '%' Name ';'
2107 *
2108 * [ WFC: No Recursion ]
2109 * A parsed entity must not contain a recursive
2110 * reference to itself, either directly or indirectly.
2111 *
2112 * [ WFC: Entity Declared ]
2113 * In a document without any DTD, a document with only an internal DTD
2114 * subset which contains no parameter entity references, or a document
2115 * with "standalone='yes'", ... ... The declaration of a parameter
2116 * entity must precede any reference to it...
2117 *
2118 * [ VC: Entity Declared ]
2119 * In a document with an external subset or external parameter entities
2120 * with "standalone='no'", ... ... The declaration of a parameter entity
2121 * must precede any reference to it...
2122 *
2123 * [ WFC: In DTD ]
2124 * Parameter-entity references may only appear in the DTD.
2125 * NOTE: misleading but this is handled.
2126 *
2127 * A PEReference may have been detected in the current input stream
2128 * the handling is done accordingly to
2129 * http://www.w3.org/TR/REC-xml#entproc
2130 * i.e.
2131 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002132 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002133 */
2134void
2135xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002136 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002137 xmlEntityPtr entity = NULL;
2138 xmlParserInputPtr input;
2139
Owen Taylor3473f882001-02-23 17:55:21 +00002140 if (RAW != '%') return;
2141 switch(ctxt->instate) {
2142 case XML_PARSER_CDATA_SECTION:
2143 return;
2144 case XML_PARSER_COMMENT:
2145 return;
2146 case XML_PARSER_START_TAG:
2147 return;
2148 case XML_PARSER_END_TAG:
2149 return;
2150 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002151 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002152 return;
2153 case XML_PARSER_PROLOG:
2154 case XML_PARSER_START:
2155 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002156 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002157 return;
2158 case XML_PARSER_ENTITY_DECL:
2159 case XML_PARSER_CONTENT:
2160 case XML_PARSER_ATTRIBUTE_VALUE:
2161 case XML_PARSER_PI:
2162 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002163 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002164 /* we just ignore it there */
2165 return;
2166 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002167 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002168 return;
2169 case XML_PARSER_ENTITY_VALUE:
2170 /*
2171 * NOTE: in the case of entity values, we don't do the
2172 * substitution here since we need the literal
2173 * entity value to be able to save the internal
2174 * subset of the document.
2175 * This will be handled by xmlStringDecodeEntities
2176 */
2177 return;
2178 case XML_PARSER_DTD:
2179 /*
2180 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2181 * In the internal DTD subset, parameter-entity references
2182 * can occur only where markup declarations can occur, not
2183 * within markup declarations.
2184 * In that case this is handled in xmlParseMarkupDecl
2185 */
2186 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2187 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002188 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002189 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002190 break;
2191 case XML_PARSER_IGNORE:
2192 return;
2193 }
2194
2195 NEXT;
2196 name = xmlParseName(ctxt);
2197 if (xmlParserDebugEntities)
2198 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002199 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002200 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002201 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002202 } else {
2203 if (RAW == ';') {
2204 NEXT;
2205 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2206 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2207 if (entity == NULL) {
2208
2209 /*
2210 * [ WFC: Entity Declared ]
2211 * In a document without any DTD, a document with only an
2212 * internal DTD subset which contains no parameter entity
2213 * references, or a document with "standalone='yes'", ...
2214 * ... The declaration of a parameter entity must precede
2215 * any reference to it...
2216 */
2217 if ((ctxt->standalone == 1) ||
2218 ((ctxt->hasExternalSubset == 0) &&
2219 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002220 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002221 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002222 } else {
2223 /*
2224 * [ VC: Entity Declared ]
2225 * In a document with an external subset or external
2226 * parameter entities with "standalone='no'", ...
2227 * ... The declaration of a parameter entity must precede
2228 * any reference to it...
2229 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002230 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2231 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2232 "PEReference: %%%s; not found\n",
2233 name);
2234 } else
2235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2236 "PEReference: %%%s; not found\n",
2237 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002238 ctxt->valid = 0;
2239 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002240 } else if (ctxt->input->free != deallocblankswrapper) {
2241 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2242 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002243 } else {
2244 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2245 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002246 xmlChar start[4];
2247 xmlCharEncoding enc;
2248
Owen Taylor3473f882001-02-23 17:55:21 +00002249 /*
2250 * handle the extra spaces added before and after
2251 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002252 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002253 */
2254 input = xmlNewEntityInputStream(ctxt, entity);
2255 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002256
2257 /*
2258 * Get the 4 first bytes and decode the charset
2259 * if enc != XML_CHAR_ENCODING_NONE
2260 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002261 * Note that, since we may have some non-UTF8
2262 * encoding (like UTF16, bug 135229), the 'length'
2263 * is not known, but we can calculate based upon
2264 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002265 */
2266 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002267 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002268 start[0] = RAW;
2269 start[1] = NXT(1);
2270 start[2] = NXT(2);
2271 start[3] = NXT(3);
2272 enc = xmlDetectCharEncoding(start, 4);
2273 if (enc != XML_CHAR_ENCODING_NONE) {
2274 xmlSwitchEncoding(ctxt, enc);
2275 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002276 }
2277
Owen Taylor3473f882001-02-23 17:55:21 +00002278 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002279 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2280 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002281 xmlParseTextDecl(ctxt);
2282 }
Owen Taylor3473f882001-02-23 17:55:21 +00002283 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002284 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2285 "PEReference: %s is not a parameter entity\n",
2286 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002287 }
2288 }
2289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
Owen Taylor3473f882001-02-23 17:55:21 +00002292 }
2293}
2294
2295/*
2296 * Macro used to grow the current buffer.
2297 */
2298#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002299 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002300 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002301 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002302 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002303 if (tmp == NULL) goto mem_error; \
2304 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002305}
2306
2307/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002308 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002309 * @ctxt: the parser context
2310 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002311 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002312 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2313 * @end: an end marker xmlChar, 0 if none
2314 * @end2: an end marker xmlChar, 0 if none
2315 * @end3: an end marker xmlChar, 0 if none
2316 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002317 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002318 *
2319 * [67] Reference ::= EntityRef | CharRef
2320 *
2321 * [69] PEReference ::= '%' Name ';'
2322 *
2323 * Returns A newly allocated string with the substitution done. The caller
2324 * must deallocate it !
2325 */
2326xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002327xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2328 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002329 xmlChar *buffer = NULL;
2330 int buffer_size = 0;
2331
2332 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002333 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002334 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002335 xmlEntityPtr ent;
2336 int c,l;
2337 int nbchars = 0;
2338
Daniel Veillarda82b1822004-11-08 16:24:57 +00002339 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002340 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002341 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002342
2343 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 return(NULL);
2346 }
2347
2348 /*
2349 * allocate a translation buffer.
2350 */
2351 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002352 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002353 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002354
2355 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002356 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002357 * we are operating on already parsed values.
2358 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002359 if (str < last)
2360 c = CUR_SCHAR(str, l);
2361 else
2362 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002363 while ((c != 0) && (c != end) && /* non input consuming loop */
2364 (c != end2) && (c != end3)) {
2365
2366 if (c == 0) break;
2367 if ((c == '&') && (str[1] == '#')) {
2368 int val = xmlParseStringCharRef(ctxt, &str);
2369 if (val != 0) {
2370 COPY_BUF(0,buffer,nbchars,val);
2371 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002372 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2373 growBuffer(buffer);
2374 }
Owen Taylor3473f882001-02-23 17:55:21 +00002375 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2376 if (xmlParserDebugEntities)
2377 xmlGenericError(xmlGenericErrorContext,
2378 "String decoding Entity Reference: %.30s\n",
2379 str);
2380 ent = xmlParseStringEntityRef(ctxt, &str);
2381 if ((ent != NULL) &&
2382 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2383 if (ent->content != NULL) {
2384 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002385 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2386 growBuffer(buffer);
2387 }
Owen Taylor3473f882001-02-23 17:55:21 +00002388 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002389 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2390 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002391 }
2392 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002393 ctxt->depth++;
2394 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2395 0, 0, 0);
2396 ctxt->depth--;
2397 if (rep != NULL) {
2398 current = rep;
2399 while (*current != 0) { /* non input consuming loop */
2400 buffer[nbchars++] = *current++;
2401 if (nbchars >
2402 buffer_size - XML_PARSER_BUFFER_SIZE) {
2403 growBuffer(buffer);
2404 }
2405 }
2406 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002407 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002408 }
2409 } else if (ent != NULL) {
2410 int i = xmlStrlen(ent->name);
2411 const xmlChar *cur = ent->name;
2412
2413 buffer[nbchars++] = '&';
2414 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2415 growBuffer(buffer);
2416 }
2417 for (;i > 0;i--)
2418 buffer[nbchars++] = *cur++;
2419 buffer[nbchars++] = ';';
2420 }
2421 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2422 if (xmlParserDebugEntities)
2423 xmlGenericError(xmlGenericErrorContext,
2424 "String decoding PE Reference: %.30s\n", str);
2425 ent = xmlParseStringPEReference(ctxt, &str);
2426 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002427 if (ent->content == NULL) {
2428 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2429 }
2430 }
Owen Taylor3473f882001-02-23 17:55:21 +00002431 ctxt->depth++;
2432 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2433 0, 0, 0);
2434 ctxt->depth--;
2435 if (rep != NULL) {
2436 current = rep;
2437 while (*current != 0) { /* non input consuming loop */
2438 buffer[nbchars++] = *current++;
2439 if (nbchars >
2440 buffer_size - XML_PARSER_BUFFER_SIZE) {
2441 growBuffer(buffer);
2442 }
2443 }
2444 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002445 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002446 }
2447 }
2448 } else {
2449 COPY_BUF(l,buffer,nbchars,c);
2450 str += l;
2451 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2452 growBuffer(buffer);
2453 }
2454 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 if (str < last)
2456 c = CUR_SCHAR(str, l);
2457 else
2458 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002459 }
2460 buffer[nbchars++] = 0;
2461 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002462
2463mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002464 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002465 if (rep != NULL)
2466 xmlFree(rep);
2467 if (buffer != NULL)
2468 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002469 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002470}
2471
Daniel Veillarde57ec792003-09-10 10:50:59 +00002472/**
2473 * xmlStringDecodeEntities:
2474 * @ctxt: the parser context
2475 * @str: the input string
2476 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2477 * @end: an end marker xmlChar, 0 if none
2478 * @end2: an end marker xmlChar, 0 if none
2479 * @end3: an end marker xmlChar, 0 if none
2480 *
2481 * Takes a entity string content and process to do the adequate substitutions.
2482 *
2483 * [67] Reference ::= EntityRef | CharRef
2484 *
2485 * [69] PEReference ::= '%' Name ';'
2486 *
2487 * Returns A newly allocated string with the substitution done. The caller
2488 * must deallocate it !
2489 */
2490xmlChar *
2491xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2492 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002493 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002494 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2495 end, end2, end3));
2496}
Owen Taylor3473f882001-02-23 17:55:21 +00002497
2498/************************************************************************
2499 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002500 * Commodity functions, cleanup needed ? *
2501 * *
2502 ************************************************************************/
2503
2504/**
2505 * areBlanks:
2506 * @ctxt: an XML parser context
2507 * @str: a xmlChar *
2508 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002509 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002510 *
2511 * Is this a sequence of blank chars that one can ignore ?
2512 *
2513 * Returns 1 if ignorable 0 otherwise.
2514 */
2515
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002516static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2517 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002518 int i, ret;
2519 xmlNodePtr lastChild;
2520
Daniel Veillard05c13a22001-09-09 08:38:09 +00002521 /*
2522 * Don't spend time trying to differentiate them, the same callback is
2523 * used !
2524 */
2525 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002526 return(0);
2527
Owen Taylor3473f882001-02-23 17:55:21 +00002528 /*
2529 * Check for xml:space value.
2530 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002531 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2532 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(0);
2534
2535 /*
2536 * Check that the string is made of blanks
2537 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002538 if (blank_chars == 0) {
2539 for (i = 0;i < len;i++)
2540 if (!(IS_BLANK_CH(str[i]))) return(0);
2541 }
Owen Taylor3473f882001-02-23 17:55:21 +00002542
2543 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002544 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002545 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002546 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (ctxt->myDoc != NULL) {
2548 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2549 if (ret == 0) return(1);
2550 if (ret == 1) return(0);
2551 }
2552
2553 /*
2554 * Otherwise, heuristic :-\
2555 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002556 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002557 if ((ctxt->node->children == NULL) &&
2558 (RAW == '<') && (NXT(1) == '/')) return(0);
2559
2560 lastChild = xmlGetLastChild(ctxt->node);
2561 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002562 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2563 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 } else if (xmlNodeIsText(lastChild))
2565 return(0);
2566 else if ((ctxt->node->children != NULL) &&
2567 (xmlNodeIsText(ctxt->node->children)))
2568 return(0);
2569 return(1);
2570}
2571
Owen Taylor3473f882001-02-23 17:55:21 +00002572/************************************************************************
2573 * *
2574 * Extra stuff for namespace support *
2575 * Relates to http://www.w3.org/TR/WD-xml-names *
2576 * *
2577 ************************************************************************/
2578
2579/**
2580 * xmlSplitQName:
2581 * @ctxt: an XML parser context
2582 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002583 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002584 *
2585 * parse an UTF8 encoded XML qualified name string
2586 *
2587 * [NS 5] QName ::= (Prefix ':')? LocalPart
2588 *
2589 * [NS 6] Prefix ::= NCName
2590 *
2591 * [NS 7] LocalPart ::= NCName
2592 *
2593 * Returns the local part, and prefix is updated
2594 * to get the Prefix if any.
2595 */
2596
2597xmlChar *
2598xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2599 xmlChar buf[XML_MAX_NAMELEN + 5];
2600 xmlChar *buffer = NULL;
2601 int len = 0;
2602 int max = XML_MAX_NAMELEN;
2603 xmlChar *ret = NULL;
2604 const xmlChar *cur = name;
2605 int c;
2606
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002607 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002608 *prefix = NULL;
2609
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002610 if (cur == NULL) return(NULL);
2611
Owen Taylor3473f882001-02-23 17:55:21 +00002612#ifndef XML_XML_NAMESPACE
2613 /* xml: prefix is not really a namespace */
2614 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2615 (cur[2] == 'l') && (cur[3] == ':'))
2616 return(xmlStrdup(name));
2617#endif
2618
Daniel Veillard597bc482003-07-24 16:08:28 +00002619 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002620 if (cur[0] == ':')
2621 return(xmlStrdup(name));
2622
2623 c = *cur++;
2624 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2625 buf[len++] = c;
2626 c = *cur++;
2627 }
2628 if (len >= max) {
2629 /*
2630 * Okay someone managed to make a huge name, so he's ready to pay
2631 * for the processing speed.
2632 */
2633 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002634
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002635 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002637 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002638 return(NULL);
2639 }
2640 memcpy(buffer, buf, len);
2641 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2642 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002643 xmlChar *tmp;
2644
Owen Taylor3473f882001-02-23 17:55:21 +00002645 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002646 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002647 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002648 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002649 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002650 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002651 return(NULL);
2652 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002653 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 }
2655 buffer[len++] = c;
2656 c = *cur++;
2657 }
2658 buffer[len] = 0;
2659 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002660
Daniel Veillard597bc482003-07-24 16:08:28 +00002661 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002662 if (buffer != NULL)
2663 xmlFree(buffer);
2664 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002665 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002666 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002667
Owen Taylor3473f882001-02-23 17:55:21 +00002668 if (buffer == NULL)
2669 ret = xmlStrndup(buf, len);
2670 else {
2671 ret = buffer;
2672 buffer = NULL;
2673 max = XML_MAX_NAMELEN;
2674 }
2675
2676
2677 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002678 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002680 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002681 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002682 }
Owen Taylor3473f882001-02-23 17:55:21 +00002683 len = 0;
2684
Daniel Veillardbb284f42002-10-16 18:02:47 +00002685 /*
2686 * Check that the first character is proper to start
2687 * a new name
2688 */
2689 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2690 ((c >= 0x41) && (c <= 0x5A)) ||
2691 (c == '_') || (c == ':'))) {
2692 int l;
2693 int first = CUR_SCHAR(cur, l);
2694
2695 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002696 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002697 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002698 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002699 }
2700 }
2701 cur++;
2702
Owen Taylor3473f882001-02-23 17:55:21 +00002703 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2704 buf[len++] = c;
2705 c = *cur++;
2706 }
2707 if (len >= max) {
2708 /*
2709 * Okay someone managed to make a huge name, so he's ready to pay
2710 * for the processing speed.
2711 */
2712 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002713
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002714 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002716 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 return(NULL);
2718 }
2719 memcpy(buffer, buf, len);
2720 while (c != 0) { /* tested bigname2.xml */
2721 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002722 xmlChar *tmp;
2723
Owen Taylor3473f882001-02-23 17:55:21 +00002724 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002725 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002726 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002727 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002728 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002729 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002730 return(NULL);
2731 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002732 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002733 }
2734 buffer[len++] = c;
2735 c = *cur++;
2736 }
2737 buffer[len] = 0;
2738 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002739
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if (buffer == NULL)
2741 ret = xmlStrndup(buf, len);
2742 else {
2743 ret = buffer;
2744 }
2745 }
2746
2747 return(ret);
2748}
2749
2750/************************************************************************
2751 * *
2752 * The parser itself *
2753 * Relates to http://www.w3.org/TR/REC-xml *
2754 * *
2755 ************************************************************************/
2756
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002757static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002758static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002759 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002760
Owen Taylor3473f882001-02-23 17:55:21 +00002761/**
2762 * xmlParseName:
2763 * @ctxt: an XML parser context
2764 *
2765 * parse an XML name.
2766 *
2767 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2768 * CombiningChar | Extender
2769 *
2770 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2771 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002772 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002773 *
2774 * Returns the Name parsed or NULL
2775 */
2776
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002777const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002778xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002780 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002781 int count = 0;
2782
2783 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002784
2785 /*
2786 * Accelerator for simple ASCII names
2787 */
2788 in = ctxt->input->cur;
2789 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2790 ((*in >= 0x41) && (*in <= 0x5A)) ||
2791 (*in == '_') || (*in == ':')) {
2792 in++;
2793 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2794 ((*in >= 0x41) && (*in <= 0x5A)) ||
2795 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002796 (*in == '_') || (*in == '-') ||
2797 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002798 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002799 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002800 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002801 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002802 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002803 ctxt->nbChars += count;
2804 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002805 if (ret == NULL)
2806 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002807 return(ret);
2808 }
2809 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002810 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002811}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002812
Daniel Veillard46de64e2002-05-29 08:21:33 +00002813/**
2814 * xmlParseNameAndCompare:
2815 * @ctxt: an XML parser context
2816 *
2817 * parse an XML name and compares for match
2818 * (specialized for endtag parsing)
2819 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002820 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2821 * and the name for mismatch
2822 */
2823
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002824static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002825xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002826 register const xmlChar *cmp = other;
2827 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002828 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002829
2830 GROW;
2831
2832 in = ctxt->input->cur;
2833 while (*in != 0 && *in == *cmp) {
2834 ++in;
2835 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002836 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002837 }
William M. Brack76e95df2003-10-18 16:20:14 +00002838 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002839 /* success */
2840 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002841 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002842 }
2843 /* failure (or end of input buffer), check with full function */
2844 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002845 /* strings coming from the dictionnary direct compare possible */
2846 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002847 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002848 }
2849 return ret;
2850}
2851
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002852static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002853xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002854 int len = 0, l;
2855 int c;
2856 int count = 0;
2857
2858 /*
2859 * Handler for more complex cases
2860 */
2861 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002862 c = CUR_CHAR(l);
2863 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2864 (!IS_LETTER(c) && (c != '_') &&
2865 (c != ':'))) {
2866 return(NULL);
2867 }
2868
2869 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002870 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002871 (c == '.') || (c == '-') ||
2872 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002873 (IS_COMBINING(c)) ||
2874 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (count++ > 100) {
2876 count = 0;
2877 GROW;
2878 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002879 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002880 NEXTL(l);
2881 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002882 }
Daniel Veillard96688262005-08-23 18:14:12 +00002883 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2884 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002885 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002886}
2887
2888/**
2889 * xmlParseStringName:
2890 * @ctxt: an XML parser context
2891 * @str: a pointer to the string pointer (IN/OUT)
2892 *
2893 * parse an XML name.
2894 *
2895 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2896 * CombiningChar | Extender
2897 *
2898 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2899 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002900 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002901 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002902 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002903 * is updated to the current location in the string.
2904 */
2905
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002906static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002907xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2908 xmlChar buf[XML_MAX_NAMELEN + 5];
2909 const xmlChar *cur = *str;
2910 int len = 0, l;
2911 int c;
2912
2913 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002914 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002915 (c != ':')) {
2916 return(NULL);
2917 }
2918
William M. Brack871611b2003-10-18 04:53:14 +00002919 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002920 (c == '.') || (c == '-') ||
2921 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002922 (IS_COMBINING(c)) ||
2923 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002924 COPY_BUF(l,buf,len,c);
2925 cur += l;
2926 c = CUR_SCHAR(cur, l);
2927 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2928 /*
2929 * Okay someone managed to make a huge name, so he's ready to pay
2930 * for the processing speed.
2931 */
2932 xmlChar *buffer;
2933 int max = len * 2;
2934
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002935 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002936 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002937 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002938 return(NULL);
2939 }
2940 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002941 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002942 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (c == '.') || (c == '-') ||
2944 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002945 (IS_COMBINING(c)) ||
2946 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002947 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002948 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002949 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002951 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002952 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002953 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002954 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002955 return(NULL);
2956 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002957 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 }
2959 COPY_BUF(l,buffer,len,c);
2960 cur += l;
2961 c = CUR_SCHAR(cur, l);
2962 }
2963 buffer[len] = 0;
2964 *str = cur;
2965 return(buffer);
2966 }
2967 }
2968 *str = cur;
2969 return(xmlStrndup(buf, len));
2970}
2971
2972/**
2973 * xmlParseNmtoken:
2974 * @ctxt: an XML parser context
2975 *
2976 * parse an XML Nmtoken.
2977 *
2978 * [7] Nmtoken ::= (NameChar)+
2979 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002980 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002981 *
2982 * Returns the Nmtoken parsed or NULL
2983 */
2984
2985xmlChar *
2986xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2987 xmlChar buf[XML_MAX_NAMELEN + 5];
2988 int len = 0, l;
2989 int c;
2990 int count = 0;
2991
2992 GROW;
2993 c = CUR_CHAR(l);
2994
William M. Brack871611b2003-10-18 04:53:14 +00002995 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002996 (c == '.') || (c == '-') ||
2997 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002998 (IS_COMBINING(c)) ||
2999 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003000 if (count++ > 100) {
3001 count = 0;
3002 GROW;
3003 }
3004 COPY_BUF(l,buf,len,c);
3005 NEXTL(l);
3006 c = CUR_CHAR(l);
3007 if (len >= XML_MAX_NAMELEN) {
3008 /*
3009 * Okay someone managed to make a huge token, so he's ready to pay
3010 * for the processing speed.
3011 */
3012 xmlChar *buffer;
3013 int max = len * 2;
3014
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003015 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003016 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003017 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003018 return(NULL);
3019 }
3020 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003021 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003022 (c == '.') || (c == '-') ||
3023 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003024 (IS_COMBINING(c)) ||
3025 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (count++ > 100) {
3027 count = 0;
3028 GROW;
3029 }
3030 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003031 xmlChar *tmp;
3032
Owen Taylor3473f882001-02-23 17:55:21 +00003033 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003034 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003035 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003036 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003039 return(NULL);
3040 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003042 }
3043 COPY_BUF(l,buffer,len,c);
3044 NEXTL(l);
3045 c = CUR_CHAR(l);
3046 }
3047 buffer[len] = 0;
3048 return(buffer);
3049 }
3050 }
3051 if (len == 0)
3052 return(NULL);
3053 return(xmlStrndup(buf, len));
3054}
3055
3056/**
3057 * xmlParseEntityValue:
3058 * @ctxt: an XML parser context
3059 * @orig: if non-NULL store a copy of the original entity value
3060 *
3061 * parse a value for ENTITY declarations
3062 *
3063 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3064 * "'" ([^%&'] | PEReference | Reference)* "'"
3065 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003066 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003067 */
3068
3069xmlChar *
3070xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3071 xmlChar *buf = NULL;
3072 int len = 0;
3073 int size = XML_PARSER_BUFFER_SIZE;
3074 int c, l;
3075 xmlChar stop;
3076 xmlChar *ret = NULL;
3077 const xmlChar *cur = NULL;
3078 xmlParserInputPtr input;
3079
3080 if (RAW == '"') stop = '"';
3081 else if (RAW == '\'') stop = '\'';
3082 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003083 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003084 return(NULL);
3085 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003086 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003087 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003088 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003089 return(NULL);
3090 }
3091
3092 /*
3093 * The content of the entity definition is copied in a buffer.
3094 */
3095
3096 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3097 input = ctxt->input;
3098 GROW;
3099 NEXT;
3100 c = CUR_CHAR(l);
3101 /*
3102 * NOTE: 4.4.5 Included in Literal
3103 * When a parameter entity reference appears in a literal entity
3104 * value, ... a single or double quote character in the replacement
3105 * text is always treated as a normal data character and will not
3106 * terminate the literal.
3107 * In practice it means we stop the loop only when back at parsing
3108 * the initial entity and the quote is found
3109 */
William M. Brack871611b2003-10-18 04:53:14 +00003110 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003111 (ctxt->input != input))) {
3112 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003113 xmlChar *tmp;
3114
Owen Taylor3473f882001-02-23 17:55:21 +00003115 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003116 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3117 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003119 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003120 return(NULL);
3121 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003122 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003123 }
3124 COPY_BUF(l,buf,len,c);
3125 NEXTL(l);
3126 /*
3127 * Pop-up of finished entities.
3128 */
3129 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3130 xmlPopInput(ctxt);
3131
3132 GROW;
3133 c = CUR_CHAR(l);
3134 if (c == 0) {
3135 GROW;
3136 c = CUR_CHAR(l);
3137 }
3138 }
3139 buf[len] = 0;
3140
3141 /*
3142 * Raise problem w.r.t. '&' and '%' being used in non-entities
3143 * reference constructs. Note Charref will be handled in
3144 * xmlStringDecodeEntities()
3145 */
3146 cur = buf;
3147 while (*cur != 0) { /* non input consuming */
3148 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3149 xmlChar *name;
3150 xmlChar tmp = *cur;
3151
3152 cur++;
3153 name = xmlParseStringName(ctxt, &cur);
3154 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003155 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003156 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003157 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003158 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003159 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3160 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003161 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 }
3163 if (name != NULL)
3164 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003165 if (*cur == 0)
3166 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003167 }
3168 cur++;
3169 }
3170
3171 /*
3172 * Then PEReference entities are substituted.
3173 */
3174 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003175 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003176 xmlFree(buf);
3177 } else {
3178 NEXT;
3179 /*
3180 * NOTE: 4.4.7 Bypassed
3181 * When a general entity reference appears in the EntityValue in
3182 * an entity declaration, it is bypassed and left as is.
3183 * so XML_SUBSTITUTE_REF is not set here.
3184 */
3185 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3186 0, 0, 0);
3187 if (orig != NULL)
3188 *orig = buf;
3189 else
3190 xmlFree(buf);
3191 }
3192
3193 return(ret);
3194}
3195
3196/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003197 * xmlParseAttValueComplex:
3198 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003199 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003200 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003201 *
3202 * parse a value for an attribute, this is the fallback function
3203 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003205 *
3206 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3207 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003208static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003209xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003210 xmlChar limit = 0;
3211 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003212 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003213 int len = 0;
3214 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 xmlChar *current = NULL;
3217 xmlEntityPtr ent;
3218
Owen Taylor3473f882001-02-23 17:55:21 +00003219 if (NXT(0) == '"') {
3220 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3221 limit = '"';
3222 NEXT;
3223 } else if (NXT(0) == '\'') {
3224 limit = '\'';
3225 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3226 NEXT;
3227 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003228 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003229 return(NULL);
3230 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003231
Owen Taylor3473f882001-02-23 17:55:21 +00003232 /*
3233 * allocate a translation buffer.
3234 */
3235 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003236 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003237 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003238
3239 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003240 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003241 */
3242 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003243 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003244 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003245 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003246 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003247 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003248 if (NXT(1) == '#') {
3249 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003250
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003252 if (ctxt->replaceEntities) {
3253 if (len > buf_size - 10) {
3254 growBuffer(buf);
3255 }
3256 buf[len++] = '&';
3257 } else {
3258 /*
3259 * The reparsing will be done in xmlStringGetNodeList()
3260 * called by the attribute() function in SAX.c
3261 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003262 if (len > buf_size - 10) {
3263 growBuffer(buf);
3264 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003265 buf[len++] = '&';
3266 buf[len++] = '#';
3267 buf[len++] = '3';
3268 buf[len++] = '8';
3269 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003270 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003271 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003272 if (len > buf_size - 10) {
3273 growBuffer(buf);
3274 }
Owen Taylor3473f882001-02-23 17:55:21 +00003275 len += xmlCopyChar(0, &buf[len], val);
3276 }
3277 } else {
3278 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003279 if ((ent != NULL) &&
3280 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3281 if (len > buf_size - 10) {
3282 growBuffer(buf);
3283 }
3284 if ((ctxt->replaceEntities == 0) &&
3285 (ent->content[0] == '&')) {
3286 buf[len++] = '&';
3287 buf[len++] = '#';
3288 buf[len++] = '3';
3289 buf[len++] = '8';
3290 buf[len++] = ';';
3291 } else {
3292 buf[len++] = ent->content[0];
3293 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003294 } else if ((ent != NULL) &&
3295 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003296 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3297 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003298 XML_SUBSTITUTE_REF,
3299 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 if (rep != NULL) {
3301 current = rep;
3302 while (*current != 0) { /* non input consuming */
3303 buf[len++] = *current++;
3304 if (len > buf_size - 10) {
3305 growBuffer(buf);
3306 }
3307 }
3308 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003309 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003310 }
3311 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003312 if (len > buf_size - 10) {
3313 growBuffer(buf);
3314 }
Owen Taylor3473f882001-02-23 17:55:21 +00003315 if (ent->content != NULL)
3316 buf[len++] = ent->content[0];
3317 }
3318 } else if (ent != NULL) {
3319 int i = xmlStrlen(ent->name);
3320 const xmlChar *cur = ent->name;
3321
3322 /*
3323 * This may look absurd but is needed to detect
3324 * entities problems
3325 */
3326 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3327 (ent->content != NULL)) {
3328 xmlChar *rep;
3329 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003330 XML_SUBSTITUTE_REF, 0, 0, 0);
3331 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003333 rep = NULL;
3334 }
Owen Taylor3473f882001-02-23 17:55:21 +00003335 }
3336
3337 /*
3338 * Just output the reference
3339 */
3340 buf[len++] = '&';
3341 if (len > buf_size - i - 10) {
3342 growBuffer(buf);
3343 }
3344 for (;i > 0;i--)
3345 buf[len++] = *cur++;
3346 buf[len++] = ';';
3347 }
3348 }
3349 } else {
3350 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003351 if ((len != 0) || (!normalize)) {
3352 if ((!normalize) || (!in_space)) {
3353 COPY_BUF(l,buf,len,0x20);
3354 if (len > buf_size - 10) {
3355 growBuffer(buf);
3356 }
3357 }
3358 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003359 }
3360 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003361 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 COPY_BUF(l,buf,len,c);
3363 if (len > buf_size - 10) {
3364 growBuffer(buf);
3365 }
3366 }
3367 NEXTL(l);
3368 }
3369 GROW;
3370 c = CUR_CHAR(l);
3371 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003372 if ((in_space) && (normalize)) {
3373 while (buf[len - 1] == 0x20) len--;
3374 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003375 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003376 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003377 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003378 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003379 if ((c != 0) && (!IS_CHAR(c))) {
3380 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3381 "invalid character in attribute value\n");
3382 } else {
3383 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3384 "AttValue: ' expected\n");
3385 }
Owen Taylor3473f882001-02-23 17:55:21 +00003386 } else
3387 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003388 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003390
3391mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003392 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003393 if (buf != NULL)
3394 xmlFree(buf);
3395 if (rep != NULL)
3396 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003397 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003398}
3399
3400/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003401 * xmlParseAttValue:
3402 * @ctxt: an XML parser context
3403 *
3404 * parse a value for an attribute
3405 * Note: the parser won't do substitution of entities here, this
3406 * will be handled later in xmlStringGetNodeList
3407 *
3408 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3409 * "'" ([^<&'] | Reference)* "'"
3410 *
3411 * 3.3.3 Attribute-Value Normalization:
3412 * Before the value of an attribute is passed to the application or
3413 * checked for validity, the XML processor must normalize it as follows:
3414 * - a character reference is processed by appending the referenced
3415 * character to the attribute value
3416 * - an entity reference is processed by recursively processing the
3417 * replacement text of the entity
3418 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3419 * appending #x20 to the normalized value, except that only a single
3420 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3421 * parsed entity or the literal entity value of an internal parsed entity
3422 * - other characters are processed by appending them to the normalized value
3423 * If the declared value is not CDATA, then the XML processor must further
3424 * process the normalized attribute value by discarding any leading and
3425 * trailing space (#x20) characters, and by replacing sequences of space
3426 * (#x20) characters by a single space (#x20) character.
3427 * All attributes for which no declaration has been read should be treated
3428 * by a non-validating parser as if declared CDATA.
3429 *
3430 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3431 */
3432
3433
3434xmlChar *
3435xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003436 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003437 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003438}
3439
3440/**
Owen Taylor3473f882001-02-23 17:55:21 +00003441 * xmlParseSystemLiteral:
3442 * @ctxt: an XML parser context
3443 *
3444 * parse an XML Literal
3445 *
3446 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3447 *
3448 * Returns the SystemLiteral parsed or NULL
3449 */
3450
3451xmlChar *
3452xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3453 xmlChar *buf = NULL;
3454 int len = 0;
3455 int size = XML_PARSER_BUFFER_SIZE;
3456 int cur, l;
3457 xmlChar stop;
3458 int state = ctxt->instate;
3459 int count = 0;
3460
3461 SHRINK;
3462 if (RAW == '"') {
3463 NEXT;
3464 stop = '"';
3465 } else if (RAW == '\'') {
3466 NEXT;
3467 stop = '\'';
3468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003469 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003470 return(NULL);
3471 }
3472
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003473 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003474 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003475 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003476 return(NULL);
3477 }
3478 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3479 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003480 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003481 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003482 xmlChar *tmp;
3483
Owen Taylor3473f882001-02-23 17:55:21 +00003484 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003485 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3486 if (tmp == NULL) {
3487 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003489 ctxt->instate = (xmlParserInputState) state;
3490 return(NULL);
3491 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003492 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003493 }
3494 count++;
3495 if (count > 50) {
3496 GROW;
3497 count = 0;
3498 }
3499 COPY_BUF(l,buf,len,cur);
3500 NEXTL(l);
3501 cur = CUR_CHAR(l);
3502 if (cur == 0) {
3503 GROW;
3504 SHRINK;
3505 cur = CUR_CHAR(l);
3506 }
3507 }
3508 buf[len] = 0;
3509 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003510 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003511 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003512 } else {
3513 NEXT;
3514 }
3515 return(buf);
3516}
3517
3518/**
3519 * xmlParsePubidLiteral:
3520 * @ctxt: an XML parser context
3521 *
3522 * parse an XML public literal
3523 *
3524 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3525 *
3526 * Returns the PubidLiteral parsed or NULL.
3527 */
3528
3529xmlChar *
3530xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3531 xmlChar *buf = NULL;
3532 int len = 0;
3533 int size = XML_PARSER_BUFFER_SIZE;
3534 xmlChar cur;
3535 xmlChar stop;
3536 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003537 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003538
3539 SHRINK;
3540 if (RAW == '"') {
3541 NEXT;
3542 stop = '"';
3543 } else if (RAW == '\'') {
3544 NEXT;
3545 stop = '\'';
3546 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003547 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 return(NULL);
3549 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003550 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003551 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003552 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003553 return(NULL);
3554 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003555 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003556 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003557 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003558 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003559 xmlChar *tmp;
3560
Owen Taylor3473f882001-02-23 17:55:21 +00003561 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3563 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003564 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003565 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003566 return(NULL);
3567 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003568 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 buf[len++] = cur;
3571 count++;
3572 if (count > 50) {
3573 GROW;
3574 count = 0;
3575 }
3576 NEXT;
3577 cur = CUR;
3578 if (cur == 0) {
3579 GROW;
3580 SHRINK;
3581 cur = CUR;
3582 }
3583 }
3584 buf[len] = 0;
3585 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003586 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003587 } else {
3588 NEXT;
3589 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003590 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003591 return(buf);
3592}
3593
Daniel Veillard48b2f892001-02-25 16:11:03 +00003594void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003595
3596/*
3597 * used for the test in the inner loop of the char data testing
3598 */
3599static const unsigned char test_char_data[256] = {
3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3601 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3605 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3606 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3607 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3608 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3609 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3610 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3611 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3612 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3613 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3614 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3615 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3616 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3617 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3618 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3632};
3633
Owen Taylor3473f882001-02-23 17:55:21 +00003634/**
3635 * xmlParseCharData:
3636 * @ctxt: an XML parser context
3637 * @cdata: int indicating whether we are within a CDATA section
3638 *
3639 * parse a CharData section.
3640 * if we are within a CDATA section ']]>' marks an end of section.
3641 *
3642 * The right angle bracket (>) may be represented using the string "&gt;",
3643 * and must, for compatibility, be escaped using "&gt;" or a character
3644 * reference when it appears in the string "]]>" in content, when that
3645 * string is not marking the end of a CDATA section.
3646 *
3647 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3648 */
3649
3650void
3651xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003652 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003653 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003654 int line = ctxt->input->line;
3655 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003656 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003657
3658 SHRINK;
3659 GROW;
3660 /*
3661 * Accelerated common case where input don't need to be
3662 * modified before passing it to the handler.
3663 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003664 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003665 in = ctxt->input->cur;
3666 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003667get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003668 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003669 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003670 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003671 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003672 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003673 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003674 goto get_more_space;
3675 }
3676 if (*in == '<') {
3677 nbchar = in - ctxt->input->cur;
3678 if (nbchar > 0) {
3679 const xmlChar *tmp = ctxt->input->cur;
3680 ctxt->input->cur = in;
3681
Daniel Veillard34099b42004-11-04 17:34:35 +00003682 if ((ctxt->sax != NULL) &&
3683 (ctxt->sax->ignorableWhitespace !=
3684 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003685 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003686 if (ctxt->sax->ignorableWhitespace != NULL)
3687 ctxt->sax->ignorableWhitespace(ctxt->userData,
3688 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003689 } else {
3690 if (ctxt->sax->characters != NULL)
3691 ctxt->sax->characters(ctxt->userData,
3692 tmp, nbchar);
3693 if (*ctxt->space == -1)
3694 *ctxt->space = -2;
3695 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003696 } else if ((ctxt->sax != NULL) &&
3697 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003698 ctxt->sax->characters(ctxt->userData,
3699 tmp, nbchar);
3700 }
3701 }
3702 return;
3703 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003704
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003705get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003706 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003707 while (test_char_data[*in]) {
3708 in++;
3709 ccol++;
3710 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003711 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003712 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003713 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003714 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003715 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003716 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003717 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003718 }
3719 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003720 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003721 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003722 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003723 return;
3724 }
3725 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003726 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003727 goto get_more;
3728 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003729 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003730 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003731 if ((ctxt->sax != NULL) &&
3732 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003733 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003734 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003735 const xmlChar *tmp = ctxt->input->cur;
3736 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003737
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003738 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003739 if (ctxt->sax->ignorableWhitespace != NULL)
3740 ctxt->sax->ignorableWhitespace(ctxt->userData,
3741 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003742 } else {
3743 if (ctxt->sax->characters != NULL)
3744 ctxt->sax->characters(ctxt->userData,
3745 tmp, nbchar);
3746 if (*ctxt->space == -1)
3747 *ctxt->space = -2;
3748 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003749 line = ctxt->input->line;
3750 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003751 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003752 if (ctxt->sax->characters != NULL)
3753 ctxt->sax->characters(ctxt->userData,
3754 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003755 line = ctxt->input->line;
3756 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003757 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003758 }
3759 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003760 if (*in == 0xD) {
3761 in++;
3762 if (*in == 0xA) {
3763 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003764 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003765 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003766 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003767 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003768 in--;
3769 }
3770 if (*in == '<') {
3771 return;
3772 }
3773 if (*in == '&') {
3774 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003775 }
3776 SHRINK;
3777 GROW;
3778 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003779 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003780 nbchar = 0;
3781 }
Daniel Veillard50582112001-03-26 22:52:16 +00003782 ctxt->input->line = line;
3783 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003784 xmlParseCharDataComplex(ctxt, cdata);
3785}
3786
Daniel Veillard01c13b52002-12-10 15:19:08 +00003787/**
3788 * xmlParseCharDataComplex:
3789 * @ctxt: an XML parser context
3790 * @cdata: int indicating whether we are within a CDATA section
3791 *
3792 * parse a CharData section.this is the fallback function
3793 * of xmlParseCharData() when the parsing requires handling
3794 * of non-ASCII characters.
3795 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003796void
3797xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003798 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3799 int nbchar = 0;
3800 int cur, l;
3801 int count = 0;
3802
3803 SHRINK;
3804 GROW;
3805 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003806 while ((cur != '<') && /* checked */
3807 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003808 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if ((cur == ']') && (NXT(1) == ']') &&
3810 (NXT(2) == '>')) {
3811 if (cdata) break;
3812 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003813 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 }
3816 COPY_BUF(l,buf,nbchar,cur);
3817 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003818 buf[nbchar] = 0;
3819
Owen Taylor3473f882001-02-23 17:55:21 +00003820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003821 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003822 */
3823 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003824 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003825 if (ctxt->sax->ignorableWhitespace != NULL)
3826 ctxt->sax->ignorableWhitespace(ctxt->userData,
3827 buf, nbchar);
3828 } else {
3829 if (ctxt->sax->characters != NULL)
3830 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003831 if ((ctxt->sax->characters !=
3832 ctxt->sax->ignorableWhitespace) &&
3833 (*ctxt->space == -1))
3834 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 }
3836 }
3837 nbchar = 0;
3838 }
3839 count++;
3840 if (count > 50) {
3841 GROW;
3842 count = 0;
3843 }
3844 NEXTL(l);
3845 cur = CUR_CHAR(l);
3846 }
3847 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003848 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003849 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003850 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003851 */
3852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003853 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003854 if (ctxt->sax->ignorableWhitespace != NULL)
3855 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3856 } else {
3857 if (ctxt->sax->characters != NULL)
3858 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003859 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3860 (*ctxt->space == -1))
3861 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003862 }
3863 }
3864 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003865 if ((cur != 0) && (!IS_CHAR(cur))) {
3866 /* Generate the error and skip the offending character */
3867 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3868 "PCDATA invalid Char value %d\n",
3869 cur);
3870 NEXTL(l);
3871 }
Owen Taylor3473f882001-02-23 17:55:21 +00003872}
3873
3874/**
3875 * xmlParseExternalID:
3876 * @ctxt: an XML parser context
3877 * @publicID: a xmlChar** receiving PubidLiteral
3878 * @strict: indicate whether we should restrict parsing to only
3879 * production [75], see NOTE below
3880 *
3881 * Parse an External ID or a Public ID
3882 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003883 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003884 * 'PUBLIC' S PubidLiteral S SystemLiteral
3885 *
3886 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3887 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3888 *
3889 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3890 *
3891 * Returns the function returns SystemLiteral and in the second
3892 * case publicID receives PubidLiteral, is strict is off
3893 * it is possible to return NULL and have publicID set.
3894 */
3895
3896xmlChar *
3897xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3898 xmlChar *URI = NULL;
3899
3900 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003901
3902 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003903 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003904 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003905 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003906 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3907 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003908 }
3909 SKIP_BLANKS;
3910 URI = xmlParseSystemLiteral(ctxt);
3911 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003912 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003914 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003915 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003916 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003918 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003919 }
3920 SKIP_BLANKS;
3921 *publicID = xmlParsePubidLiteral(ctxt);
3922 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003923 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
3925 if (strict) {
3926 /*
3927 * We don't handle [83] so "S SystemLiteral" is required.
3928 */
William M. Brack76e95df2003-10-18 16:20:14 +00003929 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003931 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003932 }
3933 } else {
3934 /*
3935 * We handle [83] so we return immediately, if
3936 * "S SystemLiteral" is not detected. From a purely parsing
3937 * point of view that's a nice mess.
3938 */
3939 const xmlChar *ptr;
3940 GROW;
3941
3942 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003943 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003944
William M. Brack76e95df2003-10-18 16:20:14 +00003945 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003946 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3947 }
3948 SKIP_BLANKS;
3949 URI = xmlParseSystemLiteral(ctxt);
3950 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 }
3953 }
3954 return(URI);
3955}
3956
3957/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003958 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003959 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003960 * @buf: the already parsed part of the buffer
3961 * @len: number of bytes filles in the buffer
3962 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003963 *
3964 * Skip an XML (SGML) comment <!-- .... -->
3965 * The spec says that "For compatibility, the string "--" (double-hyphen)
3966 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003967 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003968 *
3969 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3970 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003971static void
3972xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003973 int q, ql;
3974 int r, rl;
3975 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003976 xmlParserInputPtr input = ctxt->input;
3977 int count = 0;
3978
Owen Taylor3473f882001-02-23 17:55:21 +00003979 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003980 len = 0;
3981 size = XML_PARSER_BUFFER_SIZE;
3982 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3983 if (buf == NULL) {
3984 xmlErrMemory(ctxt, NULL);
3985 return;
3986 }
Owen Taylor3473f882001-02-23 17:55:21 +00003987 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003988 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003989 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003990 if (q == 0)
3991 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003992 if (!IS_CHAR(q)) {
3993 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3994 "xmlParseComment: invalid xmlChar value %d\n",
3995 q);
3996 xmlFree (buf);
3997 return;
3998 }
Owen Taylor3473f882001-02-23 17:55:21 +00003999 NEXTL(ql);
4000 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004001 if (r == 0)
4002 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004003 if (!IS_CHAR(r)) {
4004 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4005 "xmlParseComment: invalid xmlChar value %d\n",
4006 q);
4007 xmlFree (buf);
4008 return;
4009 }
Owen Taylor3473f882001-02-23 17:55:21 +00004010 NEXTL(rl);
4011 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004012 if (cur == 0)
4013 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004014 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004015 ((cur != '>') ||
4016 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004017 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004018 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004019 }
4020 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004021 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004022 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004023 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4024 if (new_buf == NULL) {
4025 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004026 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004027 return;
4028 }
William M. Bracka3215c72004-07-31 16:24:01 +00004029 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
4031 COPY_BUF(ql,buf,len,q);
4032 q = r;
4033 ql = rl;
4034 r = cur;
4035 rl = l;
4036
4037 count++;
4038 if (count > 50) {
4039 GROW;
4040 count = 0;
4041 }
4042 NEXTL(l);
4043 cur = CUR_CHAR(l);
4044 if (cur == 0) {
4045 SHRINK;
4046 GROW;
4047 cur = CUR_CHAR(l);
4048 }
4049 }
4050 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004051 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004052 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004053 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004054 } else if (!IS_CHAR(cur)) {
4055 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4056 "xmlParseComment: invalid xmlChar value %d\n",
4057 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004058 } else {
4059 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004060 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4061 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 NEXT;
4064 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4065 (!ctxt->disableSAX))
4066 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
Daniel Veillardda629342007-08-01 07:49:06 +00004068 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004069 return;
4070not_terminated:
4071 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4072 "Comment not terminated\n", NULL);
4073 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004074 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004075}
Daniel Veillardda629342007-08-01 07:49:06 +00004076
Daniel Veillard4c778d82005-01-23 17:37:44 +00004077/**
4078 * xmlParseComment:
4079 * @ctxt: an XML parser context
4080 *
4081 * Skip an XML (SGML) comment <!-- .... -->
4082 * The spec says that "For compatibility, the string "--" (double-hyphen)
4083 * must not occur within comments. "
4084 *
4085 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4086 */
4087void
4088xmlParseComment(xmlParserCtxtPtr ctxt) {
4089 xmlChar *buf = NULL;
4090 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004091 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004092 xmlParserInputState state;
4093 const xmlChar *in;
4094 int nbchar = 0, ccol;
4095
4096 /*
4097 * Check that there is a comment right here.
4098 */
4099 if ((RAW != '<') || (NXT(1) != '!') ||
4100 (NXT(2) != '-') || (NXT(3) != '-')) return;
4101
4102 state = ctxt->instate;
4103 ctxt->instate = XML_PARSER_COMMENT;
4104 SKIP(4);
4105 SHRINK;
4106 GROW;
4107
4108 /*
4109 * Accelerated common case where input don't need to be
4110 * modified before passing it to the handler.
4111 */
4112 in = ctxt->input->cur;
4113 do {
4114 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004115 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004116 ctxt->input->line++; ctxt->input->col = 1;
4117 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004118 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004119 }
4120get_more:
4121 ccol = ctxt->input->col;
4122 while (((*in > '-') && (*in <= 0x7F)) ||
4123 ((*in >= 0x20) && (*in < '-')) ||
4124 (*in == 0x09)) {
4125 in++;
4126 ccol++;
4127 }
4128 ctxt->input->col = ccol;
4129 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004130 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004131 ctxt->input->line++; ctxt->input->col = 1;
4132 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004133 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004134 goto get_more;
4135 }
4136 nbchar = in - ctxt->input->cur;
4137 /*
4138 * save current set of data
4139 */
4140 if (nbchar > 0) {
4141 if ((ctxt->sax != NULL) &&
4142 (ctxt->sax->comment != NULL)) {
4143 if (buf == NULL) {
4144 if ((*in == '-') && (in[1] == '-'))
4145 size = nbchar + 1;
4146 else
4147 size = XML_PARSER_BUFFER_SIZE + nbchar;
4148 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4149 if (buf == NULL) {
4150 xmlErrMemory(ctxt, NULL);
4151 ctxt->instate = state;
4152 return;
4153 }
4154 len = 0;
4155 } else if (len + nbchar + 1 >= size) {
4156 xmlChar *new_buf;
4157 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4158 new_buf = (xmlChar *) xmlRealloc(buf,
4159 size * sizeof(xmlChar));
4160 if (new_buf == NULL) {
4161 xmlFree (buf);
4162 xmlErrMemory(ctxt, NULL);
4163 ctxt->instate = state;
4164 return;
4165 }
4166 buf = new_buf;
4167 }
4168 memcpy(&buf[len], ctxt->input->cur, nbchar);
4169 len += nbchar;
4170 buf[len] = 0;
4171 }
4172 }
4173 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004174 if (*in == 0xA) {
4175 in++;
4176 ctxt->input->line++; ctxt->input->col = 1;
4177 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004178 if (*in == 0xD) {
4179 in++;
4180 if (*in == 0xA) {
4181 ctxt->input->cur = in;
4182 in++;
4183 ctxt->input->line++; ctxt->input->col = 1;
4184 continue; /* while */
4185 }
4186 in--;
4187 }
4188 SHRINK;
4189 GROW;
4190 in = ctxt->input->cur;
4191 if (*in == '-') {
4192 if (in[1] == '-') {
4193 if (in[2] == '>') {
4194 SKIP(3);
4195 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4196 (!ctxt->disableSAX)) {
4197 if (buf != NULL)
4198 ctxt->sax->comment(ctxt->userData, buf);
4199 else
4200 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4201 }
4202 if (buf != NULL)
4203 xmlFree(buf);
4204 ctxt->instate = state;
4205 return;
4206 }
4207 if (buf != NULL)
4208 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4209 "Comment not terminated \n<!--%.50s\n",
4210 buf);
4211 else
4212 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4213 "Comment not terminated \n", NULL);
4214 in++;
4215 ctxt->input->col++;
4216 }
4217 in++;
4218 ctxt->input->col++;
4219 goto get_more;
4220 }
4221 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4222 xmlParseCommentComplex(ctxt, buf, len, size);
4223 ctxt->instate = state;
4224 return;
4225}
4226
Owen Taylor3473f882001-02-23 17:55:21 +00004227
4228/**
4229 * xmlParsePITarget:
4230 * @ctxt: an XML parser context
4231 *
4232 * parse the name of a PI
4233 *
4234 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4235 *
4236 * Returns the PITarget name or NULL
4237 */
4238
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004239const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004240xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004241 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004242
4243 name = xmlParseName(ctxt);
4244 if ((name != NULL) &&
4245 ((name[0] == 'x') || (name[0] == 'X')) &&
4246 ((name[1] == 'm') || (name[1] == 'M')) &&
4247 ((name[2] == 'l') || (name[2] == 'L'))) {
4248 int i;
4249 if ((name[0] == 'x') && (name[1] == 'm') &&
4250 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004251 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004252 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004253 return(name);
4254 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004255 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004256 return(name);
4257 }
4258 for (i = 0;;i++) {
4259 if (xmlW3CPIs[i] == NULL) break;
4260 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4261 return(name);
4262 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004263 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4264 "xmlParsePITarget: invalid name prefix 'xml'\n",
4265 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 return(name);
4268}
4269
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004270#ifdef LIBXML_CATALOG_ENABLED
4271/**
4272 * xmlParseCatalogPI:
4273 * @ctxt: an XML parser context
4274 * @catalog: the PI value string
4275 *
4276 * parse an XML Catalog Processing Instruction.
4277 *
4278 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4279 *
4280 * Occurs only if allowed by the user and if happening in the Misc
4281 * part of the document before any doctype informations
4282 * This will add the given catalog to the parsing context in order
4283 * to be used if there is a resolution need further down in the document
4284 */
4285
4286static void
4287xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4288 xmlChar *URL = NULL;
4289 const xmlChar *tmp, *base;
4290 xmlChar marker;
4291
4292 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004293 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004294 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4295 goto error;
4296 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004297 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004298 if (*tmp != '=') {
4299 return;
4300 }
4301 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004302 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004303 marker = *tmp;
4304 if ((marker != '\'') && (marker != '"'))
4305 goto error;
4306 tmp++;
4307 base = tmp;
4308 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4309 if (*tmp == 0)
4310 goto error;
4311 URL = xmlStrndup(base, tmp - base);
4312 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004313 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004314 if (*tmp != 0)
4315 goto error;
4316
4317 if (URL != NULL) {
4318 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4319 xmlFree(URL);
4320 }
4321 return;
4322
4323error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004324 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4325 "Catalog PI syntax error: %s\n",
4326 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004327 if (URL != NULL)
4328 xmlFree(URL);
4329}
4330#endif
4331
Owen Taylor3473f882001-02-23 17:55:21 +00004332/**
4333 * xmlParsePI:
4334 * @ctxt: an XML parser context
4335 *
4336 * parse an XML Processing Instruction.
4337 *
4338 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4339 *
4340 * The processing is transfered to SAX once parsed.
4341 */
4342
4343void
4344xmlParsePI(xmlParserCtxtPtr ctxt) {
4345 xmlChar *buf = NULL;
4346 int len = 0;
4347 int size = XML_PARSER_BUFFER_SIZE;
4348 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004349 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004350 xmlParserInputState state;
4351 int count = 0;
4352
4353 if ((RAW == '<') && (NXT(1) == '?')) {
4354 xmlParserInputPtr input = ctxt->input;
4355 state = ctxt->instate;
4356 ctxt->instate = XML_PARSER_PI;
4357 /*
4358 * this is a Processing Instruction.
4359 */
4360 SKIP(2);
4361 SHRINK;
4362
4363 /*
4364 * Parse the target name and check for special support like
4365 * namespace.
4366 */
4367 target = xmlParsePITarget(ctxt);
4368 if (target != NULL) {
4369 if ((RAW == '?') && (NXT(1) == '>')) {
4370 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004371 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4372 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004373 }
4374 SKIP(2);
4375
4376 /*
4377 * SAX: PI detected.
4378 */
4379 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4380 (ctxt->sax->processingInstruction != NULL))
4381 ctxt->sax->processingInstruction(ctxt->userData,
4382 target, NULL);
4383 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004384 return;
4385 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004386 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004387 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004388 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004389 ctxt->instate = state;
4390 return;
4391 }
4392 cur = CUR;
4393 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004394 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4395 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 }
4397 SKIP_BLANKS;
4398 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004399 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004400 ((cur != '?') || (NXT(1) != '>'))) {
4401 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004402 xmlChar *tmp;
4403
Owen Taylor3473f882001-02-23 17:55:21 +00004404 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004405 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4406 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004407 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004408 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ctxt->instate = state;
4410 return;
4411 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004412 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004413 }
4414 count++;
4415 if (count > 50) {
4416 GROW;
4417 count = 0;
4418 }
4419 COPY_BUF(l,buf,len,cur);
4420 NEXTL(l);
4421 cur = CUR_CHAR(l);
4422 if (cur == 0) {
4423 SHRINK;
4424 GROW;
4425 cur = CUR_CHAR(l);
4426 }
4427 }
4428 buf[len] = 0;
4429 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004430 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4431 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004432 } else {
4433 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004434 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4435 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437 SKIP(2);
4438
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004439#ifdef LIBXML_CATALOG_ENABLED
4440 if (((state == XML_PARSER_MISC) ||
4441 (state == XML_PARSER_START)) &&
4442 (xmlStrEqual(target, XML_CATALOG_PI))) {
4443 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4444 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4445 (allow == XML_CATA_ALLOW_ALL))
4446 xmlParseCatalogPI(ctxt, buf);
4447 }
4448#endif
4449
4450
Owen Taylor3473f882001-02-23 17:55:21 +00004451 /*
4452 * SAX: PI detected.
4453 */
4454 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4455 (ctxt->sax->processingInstruction != NULL))
4456 ctxt->sax->processingInstruction(ctxt->userData,
4457 target, buf);
4458 }
4459 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004460 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004461 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
4463 ctxt->instate = state;
4464 }
4465}
4466
4467/**
4468 * xmlParseNotationDecl:
4469 * @ctxt: an XML parser context
4470 *
4471 * parse a notation declaration
4472 *
4473 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4474 *
4475 * Hence there is actually 3 choices:
4476 * 'PUBLIC' S PubidLiteral
4477 * 'PUBLIC' S PubidLiteral S SystemLiteral
4478 * and 'SYSTEM' S SystemLiteral
4479 *
4480 * See the NOTE on xmlParseExternalID().
4481 */
4482
4483void
4484xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004485 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004486 xmlChar *Pubid;
4487 xmlChar *Systemid;
4488
Daniel Veillarda07050d2003-10-19 14:46:32 +00004489 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004490 xmlParserInputPtr input = ctxt->input;
4491 SHRINK;
4492 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004493 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4495 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004496 return;
4497 }
4498 SKIP_BLANKS;
4499
Daniel Veillard76d66f42001-05-16 21:05:17 +00004500 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004501 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004502 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 return;
4504 }
William M. Brack76e95df2003-10-18 16:20:14 +00004505 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004507 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004508 return;
4509 }
4510 SKIP_BLANKS;
4511
4512 /*
4513 * Parse the IDs.
4514 */
4515 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4516 SKIP_BLANKS;
4517
4518 if (RAW == '>') {
4519 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004520 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4521 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004522 }
4523 NEXT;
4524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4525 (ctxt->sax->notationDecl != NULL))
4526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4527 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004528 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 }
Owen Taylor3473f882001-02-23 17:55:21 +00004530 if (Systemid != NULL) xmlFree(Systemid);
4531 if (Pubid != NULL) xmlFree(Pubid);
4532 }
4533}
4534
4535/**
4536 * xmlParseEntityDecl:
4537 * @ctxt: an XML parser context
4538 *
4539 * parse <!ENTITY declarations
4540 *
4541 * [70] EntityDecl ::= GEDecl | PEDecl
4542 *
4543 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4544 *
4545 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4546 *
4547 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4548 *
4549 * [74] PEDef ::= EntityValue | ExternalID
4550 *
4551 * [76] NDataDecl ::= S 'NDATA' S Name
4552 *
4553 * [ VC: Notation Declared ]
4554 * The Name must match the declared name of a notation.
4555 */
4556
4557void
4558xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004559 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004560 xmlChar *value = NULL;
4561 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004562 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 int isParameter = 0;
4564 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004565 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004566
Daniel Veillard4c778d82005-01-23 17:37:44 +00004567 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004568 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004569 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SHRINK;
4571 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004572 skipped = SKIP_BLANKS;
4573 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004574 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4575 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
Owen Taylor3473f882001-02-23 17:55:21 +00004577
4578 if (RAW == '%') {
4579 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004580 skipped = SKIP_BLANKS;
4581 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004582 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4583 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004584 }
Owen Taylor3473f882001-02-23 17:55:21 +00004585 isParameter = 1;
4586 }
4587
Daniel Veillard76d66f42001-05-16 21:05:17 +00004588 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004589 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004590 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4591 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004592 return;
4593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004594 skipped = SKIP_BLANKS;
4595 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004596 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4597 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004598 }
Owen Taylor3473f882001-02-23 17:55:21 +00004599
Daniel Veillardf5582f12002-06-11 10:08:16 +00004600 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004601 /*
4602 * handle the various case of definitions...
4603 */
4604 if (isParameter) {
4605 if ((RAW == '"') || (RAW == '\'')) {
4606 value = xmlParseEntityValue(ctxt, &orig);
4607 if (value) {
4608 if ((ctxt->sax != NULL) &&
4609 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4610 ctxt->sax->entityDecl(ctxt->userData, name,
4611 XML_INTERNAL_PARAMETER_ENTITY,
4612 NULL, NULL, value);
4613 }
4614 } else {
4615 URI = xmlParseExternalID(ctxt, &literal, 1);
4616 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004617 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004618 }
4619 if (URI) {
4620 xmlURIPtr uri;
4621
4622 uri = xmlParseURI((const char *) URI);
4623 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004624 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4625 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004626 /*
4627 * This really ought to be a well formedness error
4628 * but the XML Core WG decided otherwise c.f. issue
4629 * E26 of the XML erratas.
4630 */
Owen Taylor3473f882001-02-23 17:55:21 +00004631 } else {
4632 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004633 /*
4634 * Okay this is foolish to block those but not
4635 * invalid URIs.
4636 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004637 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004638 } else {
4639 if ((ctxt->sax != NULL) &&
4640 (!ctxt->disableSAX) &&
4641 (ctxt->sax->entityDecl != NULL))
4642 ctxt->sax->entityDecl(ctxt->userData, name,
4643 XML_EXTERNAL_PARAMETER_ENTITY,
4644 literal, URI, NULL);
4645 }
4646 xmlFreeURI(uri);
4647 }
4648 }
4649 }
4650 } else {
4651 if ((RAW == '"') || (RAW == '\'')) {
4652 value = xmlParseEntityValue(ctxt, &orig);
4653 if ((ctxt->sax != NULL) &&
4654 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4655 ctxt->sax->entityDecl(ctxt->userData, name,
4656 XML_INTERNAL_GENERAL_ENTITY,
4657 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004658 /*
4659 * For expat compatibility in SAX mode.
4660 */
4661 if ((ctxt->myDoc == NULL) ||
4662 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4663 if (ctxt->myDoc == NULL) {
4664 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004665 if (ctxt->myDoc == NULL) {
4666 xmlErrMemory(ctxt, "New Doc failed");
4667 return;
4668 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004669 }
4670 if (ctxt->myDoc->intSubset == NULL)
4671 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4672 BAD_CAST "fake", NULL, NULL);
4673
Daniel Veillard1af9a412003-08-20 22:54:39 +00004674 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4675 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004676 }
Owen Taylor3473f882001-02-23 17:55:21 +00004677 } else {
4678 URI = xmlParseExternalID(ctxt, &literal, 1);
4679 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004680 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004681 }
4682 if (URI) {
4683 xmlURIPtr uri;
4684
4685 uri = xmlParseURI((const char *)URI);
4686 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004687 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4688 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004689 /*
4690 * This really ought to be a well formedness error
4691 * but the XML Core WG decided otherwise c.f. issue
4692 * E26 of the XML erratas.
4693 */
Owen Taylor3473f882001-02-23 17:55:21 +00004694 } else {
4695 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004696 /*
4697 * Okay this is foolish to block those but not
4698 * invalid URIs.
4699 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
4702 xmlFreeURI(uri);
4703 }
4704 }
William M. Brack76e95df2003-10-18 16:20:14 +00004705 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4707 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004708 }
4709 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004710 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004711 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004712 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4714 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004715 }
4716 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004717 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004718 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4719 (ctxt->sax->unparsedEntityDecl != NULL))
4720 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4721 literal, URI, ndata);
4722 } else {
4723 if ((ctxt->sax != NULL) &&
4724 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4725 ctxt->sax->entityDecl(ctxt->userData, name,
4726 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4727 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004728 /*
4729 * For expat compatibility in SAX mode.
4730 * assuming the entity repalcement was asked for
4731 */
4732 if ((ctxt->replaceEntities != 0) &&
4733 ((ctxt->myDoc == NULL) ||
4734 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4735 if (ctxt->myDoc == NULL) {
4736 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004737 if (ctxt->myDoc == NULL) {
4738 xmlErrMemory(ctxt, "New Doc failed");
4739 return;
4740 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004741 }
4742
4743 if (ctxt->myDoc->intSubset == NULL)
4744 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4745 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004746 xmlSAX2EntityDecl(ctxt, name,
4747 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4748 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004749 }
Owen Taylor3473f882001-02-23 17:55:21 +00004750 }
4751 }
4752 }
4753 SKIP_BLANKS;
4754 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004755 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004756 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004757 } else {
4758 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004759 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4760 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004761 }
4762 NEXT;
4763 }
4764 if (orig != NULL) {
4765 /*
4766 * Ugly mechanism to save the raw entity value.
4767 */
4768 xmlEntityPtr cur = NULL;
4769
4770 if (isParameter) {
4771 if ((ctxt->sax != NULL) &&
4772 (ctxt->sax->getParameterEntity != NULL))
4773 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4774 } else {
4775 if ((ctxt->sax != NULL) &&
4776 (ctxt->sax->getEntity != NULL))
4777 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004778 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004779 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004780 }
Owen Taylor3473f882001-02-23 17:55:21 +00004781 }
4782 if (cur != NULL) {
4783 if (cur->orig != NULL)
4784 xmlFree(orig);
4785 else
4786 cur->orig = orig;
4787 } else
4788 xmlFree(orig);
4789 }
Owen Taylor3473f882001-02-23 17:55:21 +00004790 if (value != NULL) xmlFree(value);
4791 if (URI != NULL) xmlFree(URI);
4792 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 }
4794}
4795
4796/**
4797 * xmlParseDefaultDecl:
4798 * @ctxt: an XML parser context
4799 * @value: Receive a possible fixed default value for the attribute
4800 *
4801 * Parse an attribute default declaration
4802 *
4803 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4804 *
4805 * [ VC: Required Attribute ]
4806 * if the default declaration is the keyword #REQUIRED, then the
4807 * attribute must be specified for all elements of the type in the
4808 * attribute-list declaration.
4809 *
4810 * [ VC: Attribute Default Legal ]
4811 * The declared default value must meet the lexical constraints of
4812 * the declared attribute type c.f. xmlValidateAttributeDecl()
4813 *
4814 * [ VC: Fixed Attribute Default ]
4815 * if an attribute has a default value declared with the #FIXED
4816 * keyword, instances of that attribute must match the default value.
4817 *
4818 * [ WFC: No < in Attribute Values ]
4819 * handled in xmlParseAttValue()
4820 *
4821 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4822 * or XML_ATTRIBUTE_FIXED.
4823 */
4824
4825int
4826xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4827 int val;
4828 xmlChar *ret;
4829
4830 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004831 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004832 SKIP(9);
4833 return(XML_ATTRIBUTE_REQUIRED);
4834 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004835 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP(8);
4837 return(XML_ATTRIBUTE_IMPLIED);
4838 }
4839 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004840 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004841 SKIP(6);
4842 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004843 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4845 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004846 }
4847 SKIP_BLANKS;
4848 }
4849 ret = xmlParseAttValue(ctxt);
4850 ctxt->instate = XML_PARSER_DTD;
4851 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004852 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004853 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004854 } else
4855 *value = ret;
4856 return(val);
4857}
4858
4859/**
4860 * xmlParseNotationType:
4861 * @ctxt: an XML parser context
4862 *
4863 * parse an Notation attribute type.
4864 *
4865 * Note: the leading 'NOTATION' S part has already being parsed...
4866 *
4867 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4868 *
4869 * [ VC: Notation Attributes ]
4870 * Values of this type must match one of the notation names included
4871 * in the declaration; all notation names in the declaration must be declared.
4872 *
4873 * Returns: the notation attribute tree built while parsing
4874 */
4875
4876xmlEnumerationPtr
4877xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004878 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004879 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4880
4881 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004882 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 return(NULL);
4884 }
4885 SHRINK;
4886 do {
4887 NEXT;
4888 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004889 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004890 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004891 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4892 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004893 return(ret);
4894 }
4895 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004896 if (cur == NULL) return(ret);
4897 if (last == NULL) ret = last = cur;
4898 else {
4899 last->next = cur;
4900 last = cur;
4901 }
4902 SKIP_BLANKS;
4903 } while (RAW == '|');
4904 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004905 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004906 if ((last != NULL) && (last != ret))
4907 xmlFreeEnumeration(last);
4908 return(ret);
4909 }
4910 NEXT;
4911 return(ret);
4912}
4913
4914/**
4915 * xmlParseEnumerationType:
4916 * @ctxt: an XML parser context
4917 *
4918 * parse an Enumeration attribute type.
4919 *
4920 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4921 *
4922 * [ VC: Enumeration ]
4923 * Values of this type must match one of the Nmtoken tokens in
4924 * the declaration
4925 *
4926 * Returns: the enumeration attribute tree built while parsing
4927 */
4928
4929xmlEnumerationPtr
4930xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4931 xmlChar *name;
4932 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4933
4934 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004935 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004936 return(NULL);
4937 }
4938 SHRINK;
4939 do {
4940 NEXT;
4941 SKIP_BLANKS;
4942 name = xmlParseNmtoken(ctxt);
4943 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004944 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004945 return(ret);
4946 }
4947 cur = xmlCreateEnumeration(name);
4948 xmlFree(name);
4949 if (cur == NULL) return(ret);
4950 if (last == NULL) ret = last = cur;
4951 else {
4952 last->next = cur;
4953 last = cur;
4954 }
4955 SKIP_BLANKS;
4956 } while (RAW == '|');
4957 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004958 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004959 return(ret);
4960 }
4961 NEXT;
4962 return(ret);
4963}
4964
4965/**
4966 * xmlParseEnumeratedType:
4967 * @ctxt: an XML parser context
4968 * @tree: the enumeration tree built while parsing
4969 *
4970 * parse an Enumerated attribute type.
4971 *
4972 * [57] EnumeratedType ::= NotationType | Enumeration
4973 *
4974 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4975 *
4976 *
4977 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4978 */
4979
4980int
4981xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004982 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004983 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004984 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4986 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004987 return(0);
4988 }
4989 SKIP_BLANKS;
4990 *tree = xmlParseNotationType(ctxt);
4991 if (*tree == NULL) return(0);
4992 return(XML_ATTRIBUTE_NOTATION);
4993 }
4994 *tree = xmlParseEnumerationType(ctxt);
4995 if (*tree == NULL) return(0);
4996 return(XML_ATTRIBUTE_ENUMERATION);
4997}
4998
4999/**
5000 * xmlParseAttributeType:
5001 * @ctxt: an XML parser context
5002 * @tree: the enumeration tree built while parsing
5003 *
5004 * parse the Attribute list def for an element
5005 *
5006 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5007 *
5008 * [55] StringType ::= 'CDATA'
5009 *
5010 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5011 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5012 *
5013 * Validity constraints for attribute values syntax are checked in
5014 * xmlValidateAttributeValue()
5015 *
5016 * [ VC: ID ]
5017 * Values of type ID must match the Name production. A name must not
5018 * appear more than once in an XML document as a value of this type;
5019 * i.e., ID values must uniquely identify the elements which bear them.
5020 *
5021 * [ VC: One ID per Element Type ]
5022 * No element type may have more than one ID attribute specified.
5023 *
5024 * [ VC: ID Attribute Default ]
5025 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5026 *
5027 * [ VC: IDREF ]
5028 * Values of type IDREF must match the Name production, and values
5029 * of type IDREFS must match Names; each IDREF Name must match the value
5030 * of an ID attribute on some element in the XML document; i.e. IDREF
5031 * values must match the value of some ID attribute.
5032 *
5033 * [ VC: Entity Name ]
5034 * Values of type ENTITY must match the Name production, values
5035 * of type ENTITIES must match Names; each Entity Name must match the
5036 * name of an unparsed entity declared in the DTD.
5037 *
5038 * [ VC: Name Token ]
5039 * Values of type NMTOKEN must match the Nmtoken production; values
5040 * of type NMTOKENS must match Nmtokens.
5041 *
5042 * Returns the attribute type
5043 */
5044int
5045xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5046 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005047 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005048 SKIP(5);
5049 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005050 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005051 SKIP(6);
5052 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005053 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005054 SKIP(5);
5055 return(XML_ATTRIBUTE_IDREF);
5056 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5057 SKIP(2);
5058 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005059 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005060 SKIP(6);
5061 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005062 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005063 SKIP(8);
5064 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005065 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005066 SKIP(8);
5067 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005068 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005069 SKIP(7);
5070 return(XML_ATTRIBUTE_NMTOKEN);
5071 }
5072 return(xmlParseEnumeratedType(ctxt, tree));
5073}
5074
5075/**
5076 * xmlParseAttributeListDecl:
5077 * @ctxt: an XML parser context
5078 *
5079 * : parse the Attribute list def for an element
5080 *
5081 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5082 *
5083 * [53] AttDef ::= S Name S AttType S DefaultDecl
5084 *
5085 */
5086void
5087xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005088 const xmlChar *elemName;
5089 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005090 xmlEnumerationPtr tree;
5091
Daniel Veillarda07050d2003-10-19 14:46:32 +00005092 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005093 xmlParserInputPtr input = ctxt->input;
5094
5095 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005096 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005098 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005099 }
5100 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005101 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5104 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005105 return;
5106 }
5107 SKIP_BLANKS;
5108 GROW;
5109 while (RAW != '>') {
5110 const xmlChar *check = CUR_PTR;
5111 int type;
5112 int def;
5113 xmlChar *defaultValue = NULL;
5114
5115 GROW;
5116 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005117 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005118 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005119 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5120 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005121 break;
5122 }
5123 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005124 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005125 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005126 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005127 break;
5128 }
5129 SKIP_BLANKS;
5130
5131 type = xmlParseAttributeType(ctxt, &tree);
5132 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005133 break;
5134 }
5135
5136 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005137 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5139 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005140 if (tree != NULL)
5141 xmlFreeEnumeration(tree);
5142 break;
5143 }
5144 SKIP_BLANKS;
5145
5146 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5147 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005148 if (defaultValue != NULL)
5149 xmlFree(defaultValue);
5150 if (tree != NULL)
5151 xmlFreeEnumeration(tree);
5152 break;
5153 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005154 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5155 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005156
5157 GROW;
5158 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005159 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005161 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005162 if (defaultValue != NULL)
5163 xmlFree(defaultValue);
5164 if (tree != NULL)
5165 xmlFreeEnumeration(tree);
5166 break;
5167 }
5168 SKIP_BLANKS;
5169 }
5170 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005171 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5172 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005173 if (defaultValue != NULL)
5174 xmlFree(defaultValue);
5175 if (tree != NULL)
5176 xmlFreeEnumeration(tree);
5177 break;
5178 }
5179 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5180 (ctxt->sax->attributeDecl != NULL))
5181 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5182 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005183 else if (tree != NULL)
5184 xmlFreeEnumeration(tree);
5185
5186 if ((ctxt->sax2) && (defaultValue != NULL) &&
5187 (def != XML_ATTRIBUTE_IMPLIED) &&
5188 (def != XML_ATTRIBUTE_REQUIRED)) {
5189 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5190 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005191 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005192 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5193 }
Owen Taylor3473f882001-02-23 17:55:21 +00005194 if (defaultValue != NULL)
5195 xmlFree(defaultValue);
5196 GROW;
5197 }
5198 if (RAW == '>') {
5199 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005200 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5201 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005202 }
5203 NEXT;
5204 }
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206}
5207
5208/**
5209 * xmlParseElementMixedContentDecl:
5210 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005211 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005212 *
5213 * parse the declaration for a Mixed Element content
5214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5215 *
5216 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5217 * '(' S? '#PCDATA' S? ')'
5218 *
5219 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5220 *
5221 * [ VC: No Duplicate Types ]
5222 * The same name must not appear more than once in a single
5223 * mixed-content declaration.
5224 *
5225 * returns: the list of the xmlElementContentPtr describing the element choices
5226 */
5227xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005228xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005229 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005230 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005231
5232 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005233 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005234 SKIP(7);
5235 SKIP_BLANKS;
5236 SHRINK;
5237 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5240"Element content declaration doesn't start and stop in the same entity\n",
5241 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005242 }
Owen Taylor3473f882001-02-23 17:55:21 +00005243 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005245 if (ret == NULL)
5246 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 if (RAW == '*') {
5248 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5249 NEXT;
5250 }
5251 return(ret);
5252 }
5253 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 if (ret == NULL) return(NULL);
5256 }
5257 while (RAW == '|') {
5258 NEXT;
5259 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005260 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005261 if (ret == NULL) return(NULL);
5262 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005263 if (cur != NULL)
5264 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005265 cur = ret;
5266 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005267 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005269 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005270 if (n->c1 != NULL)
5271 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005272 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005273 if (n != NULL)
5274 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005276 }
5277 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005278 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005279 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005281 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005282 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005283 return(NULL);
5284 }
5285 SKIP_BLANKS;
5286 GROW;
5287 }
5288 if ((RAW == ')') && (NXT(1) == '*')) {
5289 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005290 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005291 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005292 if (cur->c2 != NULL)
5293 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005294 }
5295 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005296 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005297 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5298"Element content declaration doesn't start and stop in the same entity\n",
5299 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005300 }
Owen Taylor3473f882001-02-23 17:55:21 +00005301 SKIP(2);
5302 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005303 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005304 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 return(NULL);
5306 }
5307
5308 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005309 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 }
5311 return(ret);
5312}
5313
5314/**
5315 * xmlParseElementChildrenContentDecl:
5316 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005317 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005318 *
5319 * parse the declaration for a Mixed Element content
5320 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5321 *
5322 *
5323 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5324 *
5325 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5326 *
5327 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5328 *
5329 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5330 *
5331 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5332 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005333 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005334 * opening or closing parentheses in a choice, seq, or Mixed
5335 * construct is contained in the replacement text for a parameter
5336 * entity, both must be contained in the same replacement text. For
5337 * interoperability, if a parameter-entity reference appears in a
5338 * choice, seq, or Mixed construct, its replacement text should not
5339 * be empty, and neither the first nor last non-blank character of
5340 * the replacement text should be a connector (| or ,).
5341 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005342 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005343 * hierarchy.
5344 */
5345xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005346xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005347 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005348 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005349 xmlChar type = 0;
5350
5351 SKIP_BLANKS;
5352 GROW;
5353 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005354 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005355
Owen Taylor3473f882001-02-23 17:55:21 +00005356 /* Recurse on first child */
5357 NEXT;
5358 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005359 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 SKIP_BLANKS;
5361 GROW;
5362 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005363 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005366 return(NULL);
5367 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005368 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005369 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005370 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005371 return(NULL);
5372 }
Owen Taylor3473f882001-02-23 17:55:21 +00005373 GROW;
5374 if (RAW == '?') {
5375 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5376 NEXT;
5377 } else if (RAW == '*') {
5378 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5379 NEXT;
5380 } else if (RAW == '+') {
5381 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5382 NEXT;
5383 } else {
5384 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 GROW;
5387 }
5388 SKIP_BLANKS;
5389 SHRINK;
5390 while (RAW != ')') {
5391 /*
5392 * Each loop we parse one separator and one element.
5393 */
5394 if (RAW == ',') {
5395 if (type == 0) type = CUR;
5396
5397 /*
5398 * Detect "Name | Name , Name" error
5399 */
5400 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005401 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005402 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005403 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005404 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005405 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005407 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005408 return(NULL);
5409 }
5410 NEXT;
5411
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005412 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005414 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005415 xmlFreeDocElementContent(ctxt->myDoc, last);
5416 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 return(NULL);
5418 }
5419 if (last == NULL) {
5420 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005421 if (ret != NULL)
5422 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005423 ret = cur = op;
5424 } else {
5425 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005426 if (op != NULL)
5427 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005429 if (last != NULL)
5430 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005431 cur =op;
5432 last = NULL;
5433 }
5434 } else if (RAW == '|') {
5435 if (type == 0) type = CUR;
5436
5437 /*
5438 * Detect "Name , Name | Name" error
5439 */
5440 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005441 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005442 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005443 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005444 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005445 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005446 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005447 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005448 return(NULL);
5449 }
5450 NEXT;
5451
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005452 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005454 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005455 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005457 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005458 return(NULL);
5459 }
5460 if (last == NULL) {
5461 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005462 if (ret != NULL)
5463 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005464 ret = cur = op;
5465 } else {
5466 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005467 if (op != NULL)
5468 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005469 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005470 if (last != NULL)
5471 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 cur =op;
5473 last = NULL;
5474 }
5475 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005476 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005477 if ((last != NULL) && (last != ret))
5478 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005479 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005480 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005481 return(NULL);
5482 }
5483 GROW;
5484 SKIP_BLANKS;
5485 GROW;
5486 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005488 /* Recurse on second child */
5489 NEXT;
5490 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005491 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005492 SKIP_BLANKS;
5493 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005494 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005495 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005496 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005497 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005498 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005499 return(NULL);
5500 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005501 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005502 if (last == NULL) {
5503 if (ret != NULL)
5504 xmlFreeDocElementContent(ctxt->myDoc, ret);
5505 return(NULL);
5506 }
Owen Taylor3473f882001-02-23 17:55:21 +00005507 if (RAW == '?') {
5508 last->ocur = XML_ELEMENT_CONTENT_OPT;
5509 NEXT;
5510 } else if (RAW == '*') {
5511 last->ocur = XML_ELEMENT_CONTENT_MULT;
5512 NEXT;
5513 } else if (RAW == '+') {
5514 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5515 NEXT;
5516 } else {
5517 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5518 }
5519 }
5520 SKIP_BLANKS;
5521 GROW;
5522 }
5523 if ((cur != NULL) && (last != NULL)) {
5524 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005525 if (last != NULL)
5526 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005527 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005528 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005529 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5530"Element content declaration doesn't start and stop in the same entity\n",
5531 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005532 }
Owen Taylor3473f882001-02-23 17:55:21 +00005533 NEXT;
5534 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005535 if (ret != NULL) {
5536 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5537 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5538 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5539 else
5540 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5541 }
Owen Taylor3473f882001-02-23 17:55:21 +00005542 NEXT;
5543 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005544 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005545 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005546 cur = ret;
5547 /*
5548 * Some normalization:
5549 * (a | b* | c?)* == (a | b | c)*
5550 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005551 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005552 if ((cur->c1 != NULL) &&
5553 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5554 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5555 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5556 if ((cur->c2 != NULL) &&
5557 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5558 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5559 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5560 cur = cur->c2;
5561 }
5562 }
Owen Taylor3473f882001-02-23 17:55:21 +00005563 NEXT;
5564 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005565 if (ret != NULL) {
5566 int found = 0;
5567
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005568 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5569 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5570 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005571 else
5572 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005573 /*
5574 * Some normalization:
5575 * (a | b*)+ == (a | b)*
5576 * (a | b?)+ == (a | b)*
5577 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005578 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005579 if ((cur->c1 != NULL) &&
5580 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5581 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5582 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5583 found = 1;
5584 }
5585 if ((cur->c2 != NULL) &&
5586 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5587 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5588 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5589 found = 1;
5590 }
5591 cur = cur->c2;
5592 }
5593 if (found)
5594 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5595 }
Owen Taylor3473f882001-02-23 17:55:21 +00005596 NEXT;
5597 }
5598 return(ret);
5599}
5600
5601/**
5602 * xmlParseElementContentDecl:
5603 * @ctxt: an XML parser context
5604 * @name: the name of the element being defined.
5605 * @result: the Element Content pointer will be stored here if any
5606 *
5607 * parse the declaration for an Element content either Mixed or Children,
5608 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5609 *
5610 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5611 *
5612 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5613 */
5614
5615int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005616xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005617 xmlElementContentPtr *result) {
5618
5619 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005620 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005621 int res;
5622
5623 *result = NULL;
5624
5625 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005626 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005627 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005628 return(-1);
5629 }
5630 NEXT;
5631 GROW;
5632 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005633 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005634 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005635 res = XML_ELEMENT_TYPE_MIXED;
5636 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005637 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005638 res = XML_ELEMENT_TYPE_ELEMENT;
5639 }
Owen Taylor3473f882001-02-23 17:55:21 +00005640 SKIP_BLANKS;
5641 *result = tree;
5642 return(res);
5643}
5644
5645/**
5646 * xmlParseElementDecl:
5647 * @ctxt: an XML parser context
5648 *
5649 * parse an Element declaration.
5650 *
5651 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5652 *
5653 * [ VC: Unique Element Type Declaration ]
5654 * No element type may be declared more than once
5655 *
5656 * Returns the type of the element, or -1 in case of error
5657 */
5658int
5659xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005660 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005661 int ret = -1;
5662 xmlElementContentPtr content = NULL;
5663
Daniel Veillard4c778d82005-01-23 17:37:44 +00005664 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005665 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005666 xmlParserInputPtr input = ctxt->input;
5667
5668 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005669 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005670 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5671 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005672 }
5673 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005674 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005675 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005676 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5677 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005678 return(-1);
5679 }
5680 while ((RAW == 0) && (ctxt->inputNr > 1))
5681 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005682 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5684 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005685 }
5686 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(5);
5689 /*
5690 * Element must always be empty.
5691 */
5692 ret = XML_ELEMENT_TYPE_EMPTY;
5693 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5694 (NXT(2) == 'Y')) {
5695 SKIP(3);
5696 /*
5697 * Element is a generic container.
5698 */
5699 ret = XML_ELEMENT_TYPE_ANY;
5700 } else if (RAW == '(') {
5701 ret = xmlParseElementContentDecl(ctxt, name, &content);
5702 } else {
5703 /*
5704 * [ WFC: PEs in Internal Subset ] error handling.
5705 */
5706 if ((RAW == '%') && (ctxt->external == 0) &&
5707 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005708 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005709 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005710 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005711 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005712 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5713 }
Owen Taylor3473f882001-02-23 17:55:21 +00005714 return(-1);
5715 }
5716
5717 SKIP_BLANKS;
5718 /*
5719 * Pop-up of finished entities.
5720 */
5721 while ((RAW == 0) && (ctxt->inputNr > 1))
5722 xmlPopInput(ctxt);
5723 SKIP_BLANKS;
5724
5725 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005726 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005727 if (content != NULL) {
5728 xmlFreeDocElementContent(ctxt->myDoc, content);
5729 }
Owen Taylor3473f882001-02-23 17:55:21 +00005730 } else {
5731 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005732 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5733 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735
5736 NEXT;
5737 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 (ctxt->sax->elementDecl != NULL)) {
5739 if (content != NULL)
5740 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005741 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5742 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 if ((content != NULL) && (content->parent == NULL)) {
5744 /*
5745 * this is a trick: if xmlAddElementDecl is called,
5746 * instead of copying the full tree it is plugged directly
5747 * if called from the parser. Avoid duplicating the
5748 * interfaces or change the API/ABI
5749 */
5750 xmlFreeDocElementContent(ctxt->myDoc, content);
5751 }
5752 } else if (content != NULL) {
5753 xmlFreeDocElementContent(ctxt->myDoc, content);
5754 }
Owen Taylor3473f882001-02-23 17:55:21 +00005755 }
Owen Taylor3473f882001-02-23 17:55:21 +00005756 }
5757 return(ret);
5758}
5759
5760/**
Owen Taylor3473f882001-02-23 17:55:21 +00005761 * xmlParseConditionalSections
5762 * @ctxt: an XML parser context
5763 *
5764 * [61] conditionalSect ::= includeSect | ignoreSect
5765 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5766 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5767 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5768 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5769 */
5770
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005771static void
Owen Taylor3473f882001-02-23 17:55:21 +00005772xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5773 SKIP(3);
5774 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005775 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005776 SKIP(7);
5777 SKIP_BLANKS;
5778 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005779 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005780 } else {
5781 NEXT;
5782 }
5783 if (xmlParserDebugEntities) {
5784 if ((ctxt->input != NULL) && (ctxt->input->filename))
5785 xmlGenericError(xmlGenericErrorContext,
5786 "%s(%d): ", ctxt->input->filename,
5787 ctxt->input->line);
5788 xmlGenericError(xmlGenericErrorContext,
5789 "Entering INCLUDE Conditional Section\n");
5790 }
5791
5792 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5793 (NXT(2) != '>'))) {
5794 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005795 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005796
5797 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5798 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005799 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005800 NEXT;
5801 } else if (RAW == '%') {
5802 xmlParsePEReference(ctxt);
5803 } else
5804 xmlParseMarkupDecl(ctxt);
5805
5806 /*
5807 * Pop-up of finished entities.
5808 */
5809 while ((RAW == 0) && (ctxt->inputNr > 1))
5810 xmlPopInput(ctxt);
5811
Daniel Veillardfdc91562002-07-01 21:52:03 +00005812 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005813 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005814 break;
5815 }
5816 }
5817 if (xmlParserDebugEntities) {
5818 if ((ctxt->input != NULL) && (ctxt->input->filename))
5819 xmlGenericError(xmlGenericErrorContext,
5820 "%s(%d): ", ctxt->input->filename,
5821 ctxt->input->line);
5822 xmlGenericError(xmlGenericErrorContext,
5823 "Leaving INCLUDE Conditional Section\n");
5824 }
5825
Daniel Veillarda07050d2003-10-19 14:46:32 +00005826 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005827 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005828 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005829 int depth = 0;
5830
5831 SKIP(6);
5832 SKIP_BLANKS;
5833 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005834 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005835 } else {
5836 NEXT;
5837 }
5838 if (xmlParserDebugEntities) {
5839 if ((ctxt->input != NULL) && (ctxt->input->filename))
5840 xmlGenericError(xmlGenericErrorContext,
5841 "%s(%d): ", ctxt->input->filename,
5842 ctxt->input->line);
5843 xmlGenericError(xmlGenericErrorContext,
5844 "Entering IGNORE Conditional Section\n");
5845 }
5846
5847 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005848 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005849 * But disable SAX event generating DTD building in the meantime
5850 */
5851 state = ctxt->disableSAX;
5852 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005854 ctxt->instate = XML_PARSER_IGNORE;
5855
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005856 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005857 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5858 depth++;
5859 SKIP(3);
5860 continue;
5861 }
5862 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5863 if (--depth >= 0) SKIP(3);
5864 continue;
5865 }
5866 NEXT;
5867 continue;
5868 }
5869
5870 ctxt->disableSAX = state;
5871 ctxt->instate = instate;
5872
5873 if (xmlParserDebugEntities) {
5874 if ((ctxt->input != NULL) && (ctxt->input->filename))
5875 xmlGenericError(xmlGenericErrorContext,
5876 "%s(%d): ", ctxt->input->filename,
5877 ctxt->input->line);
5878 xmlGenericError(xmlGenericErrorContext,
5879 "Leaving IGNORE Conditional Section\n");
5880 }
5881
5882 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005883 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005884 }
5885
5886 if (RAW == 0)
5887 SHRINK;
5888
5889 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 } else {
5892 SKIP(3);
5893 }
5894}
5895
5896/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005897 * xmlParseMarkupDecl:
5898 * @ctxt: an XML parser context
5899 *
5900 * parse Markup declarations
5901 *
5902 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5903 * NotationDecl | PI | Comment
5904 *
5905 * [ VC: Proper Declaration/PE Nesting ]
5906 * Parameter-entity replacement text must be properly nested with
5907 * markup declarations. That is to say, if either the first character
5908 * or the last character of a markup declaration (markupdecl above) is
5909 * contained in the replacement text for a parameter-entity reference,
5910 * both must be contained in the same replacement text.
5911 *
5912 * [ WFC: PEs in Internal Subset ]
5913 * In the internal DTD subset, parameter-entity references can occur
5914 * only where markup declarations can occur, not within markup declarations.
5915 * (This does not apply to references that occur in external parameter
5916 * entities or to the external subset.)
5917 */
5918void
5919xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5920 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005921 if (CUR == '<') {
5922 if (NXT(1) == '!') {
5923 switch (NXT(2)) {
5924 case 'E':
5925 if (NXT(3) == 'L')
5926 xmlParseElementDecl(ctxt);
5927 else if (NXT(3) == 'N')
5928 xmlParseEntityDecl(ctxt);
5929 break;
5930 case 'A':
5931 xmlParseAttributeListDecl(ctxt);
5932 break;
5933 case 'N':
5934 xmlParseNotationDecl(ctxt);
5935 break;
5936 case '-':
5937 xmlParseComment(ctxt);
5938 break;
5939 default:
5940 /* there is an error but it will be detected later */
5941 break;
5942 }
5943 } else if (NXT(1) == '?') {
5944 xmlParsePI(ctxt);
5945 }
5946 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005947 /*
5948 * This is only for internal subset. On external entities,
5949 * the replacement is done before parsing stage
5950 */
5951 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5952 xmlParsePEReference(ctxt);
5953
5954 /*
5955 * Conditional sections are allowed from entities included
5956 * by PE References in the internal subset.
5957 */
5958 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5959 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5960 xmlParseConditionalSections(ctxt);
5961 }
5962 }
5963
5964 ctxt->instate = XML_PARSER_DTD;
5965}
5966
5967/**
5968 * xmlParseTextDecl:
5969 * @ctxt: an XML parser context
5970 *
5971 * parse an XML declaration header for external entities
5972 *
5973 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5974 *
5975 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5976 */
5977
5978void
5979xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5980 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005981 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005982
5983 /*
5984 * We know that '<?xml' is here.
5985 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005986 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005987 SKIP(5);
5988 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005989 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005990 return;
5991 }
5992
William M. Brack76e95df2003-10-18 16:20:14 +00005993 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5995 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005996 }
5997 SKIP_BLANKS;
5998
5999 /*
6000 * We may have the VersionInfo here.
6001 */
6002 version = xmlParseVersionInfo(ctxt);
6003 if (version == NULL)
6004 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006005 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006006 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6008 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006009 }
6010 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006011 ctxt->input->version = version;
6012
6013 /*
6014 * We must have the encoding declaration
6015 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006016 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006017 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6018 /*
6019 * The XML REC instructs us to stop parsing right here
6020 */
6021 return;
6022 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006023 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6024 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6025 "Missing encoding in text declaration\n");
6026 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006027
6028 SKIP_BLANKS;
6029 if ((RAW == '?') && (NXT(1) == '>')) {
6030 SKIP(2);
6031 } else if (RAW == '>') {
6032 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006033 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006034 NEXT;
6035 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006037 MOVETO_ENDTAG(CUR_PTR);
6038 NEXT;
6039 }
6040}
6041
6042/**
Owen Taylor3473f882001-02-23 17:55:21 +00006043 * xmlParseExternalSubset:
6044 * @ctxt: an XML parser context
6045 * @ExternalID: the external identifier
6046 * @SystemID: the system identifier (or URL)
6047 *
6048 * parse Markup declarations from an external subset
6049 *
6050 * [30] extSubset ::= textDecl? extSubsetDecl
6051 *
6052 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6053 */
6054void
6055xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6056 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006057 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006058 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006059 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006060 xmlParseTextDecl(ctxt);
6061 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6062 /*
6063 * The XML REC instructs us to stop parsing right here
6064 */
6065 ctxt->instate = XML_PARSER_EOF;
6066 return;
6067 }
6068 }
6069 if (ctxt->myDoc == NULL) {
6070 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6071 }
6072 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6073 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6074
6075 ctxt->instate = XML_PARSER_DTD;
6076 ctxt->external = 1;
6077 while (((RAW == '<') && (NXT(1) == '?')) ||
6078 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006079 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006080 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006081 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006082
6083 GROW;
6084 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6085 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006086 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006087 NEXT;
6088 } else if (RAW == '%') {
6089 xmlParsePEReference(ctxt);
6090 } else
6091 xmlParseMarkupDecl(ctxt);
6092
6093 /*
6094 * Pop-up of finished entities.
6095 */
6096 while ((RAW == 0) && (ctxt->inputNr > 1))
6097 xmlPopInput(ctxt);
6098
Daniel Veillardfdc91562002-07-01 21:52:03 +00006099 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006100 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 break;
6102 }
6103 }
6104
6105 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006106 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006107 }
6108
6109}
6110
6111/**
6112 * xmlParseReference:
6113 * @ctxt: an XML parser context
6114 *
6115 * parse and handle entity references in content, depending on the SAX
6116 * interface, this may end-up in a call to character() if this is a
6117 * CharRef, a predefined entity, if there is no reference() callback.
6118 * or if the parser was asked to switch to that mode.
6119 *
6120 * [67] Reference ::= EntityRef | CharRef
6121 */
6122void
6123xmlParseReference(xmlParserCtxtPtr ctxt) {
6124 xmlEntityPtr ent;
6125 xmlChar *val;
6126 if (RAW != '&') return;
6127
6128 if (NXT(1) == '#') {
6129 int i = 0;
6130 xmlChar out[10];
6131 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006132 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006133
Daniel Veillarddc171602008-03-26 17:41:38 +00006134 if (value == 0)
6135 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6137 /*
6138 * So we are using non-UTF-8 buffers
6139 * Check that the char fit on 8bits, if not
6140 * generate a CharRef.
6141 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006142 if (value <= 0xFF) {
6143 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006144 out[1] = 0;
6145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6146 (!ctxt->disableSAX))
6147 ctxt->sax->characters(ctxt->userData, out, 1);
6148 } else {
6149 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006150 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006152 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6154 (!ctxt->disableSAX))
6155 ctxt->sax->reference(ctxt->userData, out);
6156 }
6157 } else {
6158 /*
6159 * Just encode the value in UTF-8
6160 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006161 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006162 out[i] = 0;
6163 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6164 (!ctxt->disableSAX))
6165 ctxt->sax->characters(ctxt->userData, out, i);
6166 }
6167 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006168 int was_checked;
6169
Owen Taylor3473f882001-02-23 17:55:21 +00006170 ent = xmlParseEntityRef(ctxt);
6171 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006172 if (!ctxt->wellFormed)
6173 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006174 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006175 if ((ent->name != NULL) &&
6176 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6177 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006178 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006179
6180
6181 /*
6182 * The first reference to the entity trigger a parsing phase
6183 * where the ent->children is filled with the result from
6184 * the parsing.
6185 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006186 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006187 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006188
Owen Taylor3473f882001-02-23 17:55:21 +00006189 value = ent->content;
6190
6191 /*
6192 * Check that this entity is well formed
6193 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006194 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006195 (value[1] == 0) && (value[0] == '<') &&
6196 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6197 /*
6198 * DONE: get definite answer on this !!!
6199 * Lots of entity decls are used to declare a single
6200 * char
6201 * <!ENTITY lt "<">
6202 * Which seems to be valid since
6203 * 2.4: The ampersand character (&) and the left angle
6204 * bracket (<) may appear in their literal form only
6205 * when used ... They are also legal within the literal
6206 * entity value of an internal entity declaration;i
6207 * see "4.3.2 Well-Formed Parsed Entities".
6208 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6209 * Looking at the OASIS test suite and James Clark
6210 * tests, this is broken. However the XML REC uses
6211 * it. Is the XML REC not well-formed ????
6212 * This is a hack to avoid this problem
6213 *
6214 * ANSWER: since lt gt amp .. are already defined,
6215 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006216 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006217 * is lousy but acceptable.
6218 */
6219 list = xmlNewDocText(ctxt->myDoc, value);
6220 if (list != NULL) {
6221 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6222 (ent->children == NULL)) {
6223 ent->children = list;
6224 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006225 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006226 list->parent = (xmlNodePtr) ent;
6227 } else {
6228 xmlFreeNodeList(list);
6229 }
6230 } else if (list != NULL) {
6231 xmlFreeNodeList(list);
6232 }
6233 } else {
6234 /*
6235 * 4.3.2: An internal general parsed entity is well-formed
6236 * if its replacement text matches the production labeled
6237 * content.
6238 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006239
6240 void *user_data;
6241 /*
6242 * This is a bit hackish but this seems the best
6243 * way to make sure both SAX and DOM entity support
6244 * behaves okay.
6245 */
6246 if (ctxt->userData == ctxt)
6247 user_data = NULL;
6248 else
6249 user_data = ctxt->userData;
6250
Owen Taylor3473f882001-02-23 17:55:21 +00006251 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6252 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006253 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6254 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 ctxt->depth--;
6256 } else if (ent->etype ==
6257 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6258 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006259 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006260 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006261 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006262 ctxt->depth--;
6263 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006264 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006265 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6266 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
6268 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006270 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006271 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006272 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6273 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006274 (ent->children == NULL)) {
6275 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if (ctxt->replaceEntities) {
6277 /*
6278 * Prune it directly in the generated document
6279 * except for single text nodes.
6280 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006281 if (((list->type == XML_TEXT_NODE) &&
6282 (list->next == NULL)) ||
6283 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006284 list->parent = (xmlNodePtr) ent;
6285 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006286 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006287 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006288 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006289 while (list != NULL) {
6290 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006291 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006292 if (list->next == NULL)
6293 ent->last = list;
6294 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006295 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006296 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006297#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006298 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6299 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006300#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006301 }
6302 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006303 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006304 while (list != NULL) {
6305 list->parent = (xmlNodePtr) ent;
6306 if (list->next == NULL)
6307 ent->last = list;
6308 list = list->next;
6309 }
Owen Taylor3473f882001-02-23 17:55:21 +00006310 }
6311 } else {
6312 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006313 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006314 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006315 } else if ((ret != XML_ERR_OK) &&
6316 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006317 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6318 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006319 } else if (list != NULL) {
6320 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006321 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006322 }
6323 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006324 ent->checked = 1;
6325 }
6326
6327 if (ent->children == NULL) {
6328 /*
6329 * Probably running in SAX mode and the callbacks don't
6330 * build the entity content. So unless we already went
6331 * though parsing for first checking go though the entity
6332 * content to generate callbacks associated to the entity
6333 */
6334 if (was_checked == 1) {
6335 void *user_data;
6336 /*
6337 * This is a bit hackish but this seems the best
6338 * way to make sure both SAX and DOM entity support
6339 * behaves okay.
6340 */
6341 if (ctxt->userData == ctxt)
6342 user_data = NULL;
6343 else
6344 user_data = ctxt->userData;
6345
6346 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6347 ctxt->depth++;
6348 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6349 ent->content, user_data, NULL);
6350 ctxt->depth--;
6351 } else if (ent->etype ==
6352 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6353 ctxt->depth++;
6354 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6355 ctxt->sax, user_data, ctxt->depth,
6356 ent->URI, ent->ExternalID, NULL);
6357 ctxt->depth--;
6358 } else {
6359 ret = XML_ERR_ENTITY_PE_INTERNAL;
6360 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6361 "invalid entity type found\n", NULL);
6362 }
6363 if (ret == XML_ERR_ENTITY_LOOP) {
6364 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6365 return;
6366 }
6367 }
6368 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6369 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6370 /*
6371 * Entity reference callback comes second, it's somewhat
6372 * superfluous but a compatibility to historical behaviour
6373 */
6374 ctxt->sax->reference(ctxt->userData, ent->name);
6375 }
6376 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006377 }
6378 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006379 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006380 /*
6381 * Create a node.
6382 */
6383 ctxt->sax->reference(ctxt->userData, ent->name);
6384 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006385 }
6386 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006387 /*
6388 * There is a problem on the handling of _private for entities
6389 * (bug 155816): Should we copy the content of the field from
6390 * the entity (possibly overwriting some value set by the user
6391 * when a copy is created), should we leave it alone, or should
6392 * we try to take care of different situations? The problem
6393 * is exacerbated by the usage of this field by the xmlReader.
6394 * To fix this bug, we look at _private on the created node
6395 * and, if it's NULL, we copy in whatever was in the entity.
6396 * If it's not NULL we leave it alone. This is somewhat of a
6397 * hack - maybe we should have further tests to determine
6398 * what to do.
6399 */
Owen Taylor3473f882001-02-23 17:55:21 +00006400 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6401 /*
6402 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006403 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006404 * In the first occurrence list contains the replacement.
6405 * progressive == 2 means we are operating on the Reader
6406 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006407 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006408 if (((list == NULL) && (ent->owner == 0)) ||
6409 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006410 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006411
6412 /*
6413 * when operating on a reader, the entities definitions
6414 * are always owning the entities subtree.
6415 if (ctxt->parseMode == XML_PARSE_READER)
6416 ent->owner = 1;
6417 */
6418
Daniel Veillard62f313b2001-07-04 19:49:14 +00006419 cur = ent->children;
6420 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006421 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006422 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006423 if (nw->_private == NULL)
6424 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006425 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006426 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006427 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006428 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006429 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006430 if (cur == ent->last) {
6431 /*
6432 * needed to detect some strange empty
6433 * node cases in the reader tests
6434 */
6435 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006436 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006437 (nw->type == XML_ELEMENT_NODE) &&
6438 (nw->children == NULL))
6439 nw->extra = 1;
6440
Daniel Veillard62f313b2001-07-04 19:49:14 +00006441 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006442 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006443 cur = cur->next;
6444 }
Daniel Veillard81273902003-09-30 00:43:48 +00006445#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006446 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006447 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006448#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006449 } else if (list == NULL) {
6450 xmlNodePtr nw = NULL, cur, next, last,
6451 firstChild = NULL;
6452 /*
6453 * Copy the entity child list and make it the new
6454 * entity child list. The goal is to make sure any
6455 * ID or REF referenced will be the one from the
6456 * document content and not the entity copy.
6457 */
6458 cur = ent->children;
6459 ent->children = NULL;
6460 last = ent->last;
6461 ent->last = NULL;
6462 while (cur != NULL) {
6463 next = cur->next;
6464 cur->next = NULL;
6465 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006466 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006467 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006468 if (nw->_private == NULL)
6469 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006470 if (firstChild == NULL){
6471 firstChild = cur;
6472 }
6473 xmlAddChild((xmlNodePtr) ent, nw);
6474 xmlAddChild(ctxt->node, cur);
6475 }
6476 if (cur == last)
6477 break;
6478 cur = next;
6479 }
6480 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006481#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006482 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6483 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006484#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006485 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006486 const xmlChar *nbktext;
6487
Daniel Veillard62f313b2001-07-04 19:49:14 +00006488 /*
6489 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006490 * node with a possible previous text one which
6491 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006492 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006493 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6494 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006495 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006496 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006497 if ((ent->last != ent->children) &&
6498 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006499 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006500 xmlAddChildList(ctxt->node, ent->children);
6501 }
6502
Owen Taylor3473f882001-02-23 17:55:21 +00006503 /*
6504 * This is to avoid a nasty side effect, see
6505 * characters() in SAX.c
6506 */
6507 ctxt->nodemem = 0;
6508 ctxt->nodelen = 0;
6509 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006510 }
6511 }
6512 } else {
6513 val = ent->content;
6514 if (val == NULL) return;
6515 /*
6516 * inline the entity.
6517 */
6518 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6519 (!ctxt->disableSAX))
6520 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6521 }
6522 }
6523}
6524
6525/**
6526 * xmlParseEntityRef:
6527 * @ctxt: an XML parser context
6528 *
6529 * parse ENTITY references declarations
6530 *
6531 * [68] EntityRef ::= '&' Name ';'
6532 *
6533 * [ WFC: Entity Declared ]
6534 * In a document without any DTD, a document with only an internal DTD
6535 * subset which contains no parameter entity references, or a document
6536 * with "standalone='yes'", the Name given in the entity reference
6537 * must match that in an entity declaration, except that well-formed
6538 * documents need not declare any of the following entities: amp, lt,
6539 * gt, apos, quot. The declaration of a parameter entity must precede
6540 * any reference to it. Similarly, the declaration of a general entity
6541 * must precede any reference to it which appears in a default value in an
6542 * attribute-list declaration. Note that if entities are declared in the
6543 * external subset or in external parameter entities, a non-validating
6544 * processor is not obligated to read and process their declarations;
6545 * for such documents, the rule that an entity must be declared is a
6546 * well-formedness constraint only if standalone='yes'.
6547 *
6548 * [ WFC: Parsed Entity ]
6549 * An entity reference must not contain the name of an unparsed entity
6550 *
6551 * Returns the xmlEntityPtr if found, or NULL otherwise.
6552 */
6553xmlEntityPtr
6554xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006555 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006556 xmlEntityPtr ent = NULL;
6557
6558 GROW;
6559
6560 if (RAW == '&') {
6561 NEXT;
6562 name = xmlParseName(ctxt);
6563 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6565 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006566 } else {
6567 if (RAW == ';') {
6568 NEXT;
6569 /*
6570 * Ask first SAX for entity resolution, otherwise try the
6571 * predefined set.
6572 */
6573 if (ctxt->sax != NULL) {
6574 if (ctxt->sax->getEntity != NULL)
6575 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006576 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006577 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6579 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006580 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006581 }
Owen Taylor3473f882001-02-23 17:55:21 +00006582 }
6583 /*
6584 * [ WFC: Entity Declared ]
6585 * In a document without any DTD, a document with only an
6586 * internal DTD subset which contains no parameter entity
6587 * references, or a document with "standalone='yes'", the
6588 * Name given in the entity reference must match that in an
6589 * entity declaration, except that well-formed documents
6590 * need not declare any of the following entities: amp, lt,
6591 * gt, apos, quot.
6592 * The declaration of a parameter entity must precede any
6593 * reference to it.
6594 * Similarly, the declaration of a general entity must
6595 * precede any reference to it which appears in a default
6596 * value in an attribute-list declaration. Note that if
6597 * entities are declared in the external subset or in
6598 * external parameter entities, a non-validating processor
6599 * is not obligated to read and process their declarations;
6600 * for such documents, the rule that an entity must be
6601 * declared is a well-formedness constraint only if
6602 * standalone='yes'.
6603 */
6604 if (ent == NULL) {
6605 if ((ctxt->standalone == 1) ||
6606 ((ctxt->hasExternalSubset == 0) &&
6607 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006608 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006609 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006610 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006611 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006612 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006613 if ((ctxt->inSubset == 0) &&
6614 (ctxt->sax != NULL) &&
6615 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006616 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006617 }
Owen Taylor3473f882001-02-23 17:55:21 +00006618 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006619 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 }
6621
6622 /*
6623 * [ WFC: Parsed Entity ]
6624 * An entity reference must not contain the name of an
6625 * unparsed entity
6626 */
6627 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006628 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006629 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006630 }
6631
6632 /*
6633 * [ WFC: No External Entity References ]
6634 * Attribute values cannot contain direct or indirect
6635 * entity references to external entities.
6636 */
6637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6638 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006639 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6640 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006641 }
6642 /*
6643 * [ WFC: No < in Attribute Values ]
6644 * The replacement text of any entity referred to directly or
6645 * indirectly in an attribute value (other than "&lt;") must
6646 * not contain a <.
6647 */
6648 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6649 (ent != NULL) &&
6650 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6651 (ent->content != NULL) &&
6652 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006653 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006654 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006655 }
6656
6657 /*
6658 * Internal check, no parameter entities here ...
6659 */
6660 else {
6661 switch (ent->etype) {
6662 case XML_INTERNAL_PARAMETER_ENTITY:
6663 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006664 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6665 "Attempt to reference the parameter entity '%s'\n",
6666 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006667 break;
6668 default:
6669 break;
6670 }
6671 }
6672
6673 /*
6674 * [ WFC: No Recursion ]
6675 * A parsed entity must not contain a recursive reference
6676 * to itself, either directly or indirectly.
6677 * Done somewhere else
6678 */
6679
6680 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006681 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006682 }
Owen Taylor3473f882001-02-23 17:55:21 +00006683 }
6684 }
6685 return(ent);
6686}
6687
6688/**
6689 * xmlParseStringEntityRef:
6690 * @ctxt: an XML parser context
6691 * @str: a pointer to an index in the string
6692 *
6693 * parse ENTITY references declarations, but this version parses it from
6694 * a string value.
6695 *
6696 * [68] EntityRef ::= '&' Name ';'
6697 *
6698 * [ WFC: Entity Declared ]
6699 * In a document without any DTD, a document with only an internal DTD
6700 * subset which contains no parameter entity references, or a document
6701 * with "standalone='yes'", the Name given in the entity reference
6702 * must match that in an entity declaration, except that well-formed
6703 * documents need not declare any of the following entities: amp, lt,
6704 * gt, apos, quot. The declaration of a parameter entity must precede
6705 * any reference to it. Similarly, the declaration of a general entity
6706 * must precede any reference to it which appears in a default value in an
6707 * attribute-list declaration. Note that if entities are declared in the
6708 * external subset or in external parameter entities, a non-validating
6709 * processor is not obligated to read and process their declarations;
6710 * for such documents, the rule that an entity must be declared is a
6711 * well-formedness constraint only if standalone='yes'.
6712 *
6713 * [ WFC: Parsed Entity ]
6714 * An entity reference must not contain the name of an unparsed entity
6715 *
6716 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6717 * is updated to the current location in the string.
6718 */
6719xmlEntityPtr
6720xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6721 xmlChar *name;
6722 const xmlChar *ptr;
6723 xmlChar cur;
6724 xmlEntityPtr ent = NULL;
6725
6726 if ((str == NULL) || (*str == NULL))
6727 return(NULL);
6728 ptr = *str;
6729 cur = *ptr;
6730 if (cur == '&') {
6731 ptr++;
6732 cur = *ptr;
6733 name = xmlParseStringName(ctxt, &ptr);
6734 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6736 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006737 } else {
6738 if (*ptr == ';') {
6739 ptr++;
6740 /*
6741 * Ask first SAX for entity resolution, otherwise try the
6742 * predefined set.
6743 */
6744 if (ctxt->sax != NULL) {
6745 if (ctxt->sax->getEntity != NULL)
6746 ent = ctxt->sax->getEntity(ctxt->userData, name);
6747 if (ent == NULL)
6748 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006749 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006750 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006751 }
Owen Taylor3473f882001-02-23 17:55:21 +00006752 }
6753 /*
6754 * [ WFC: Entity Declared ]
6755 * In a document without any DTD, a document with only an
6756 * internal DTD subset which contains no parameter entity
6757 * references, or a document with "standalone='yes'", the
6758 * Name given in the entity reference must match that in an
6759 * entity declaration, except that well-formed documents
6760 * need not declare any of the following entities: amp, lt,
6761 * gt, apos, quot.
6762 * The declaration of a parameter entity must precede any
6763 * reference to it.
6764 * Similarly, the declaration of a general entity must
6765 * precede any reference to it which appears in a default
6766 * value in an attribute-list declaration. Note that if
6767 * entities are declared in the external subset or in
6768 * external parameter entities, a non-validating processor
6769 * is not obligated to read and process their declarations;
6770 * for such documents, the rule that an entity must be
6771 * declared is a well-formedness constraint only if
6772 * standalone='yes'.
6773 */
6774 if (ent == NULL) {
6775 if ((ctxt->standalone == 1) ||
6776 ((ctxt->hasExternalSubset == 0) &&
6777 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006779 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006781 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006782 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006783 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006784 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006785 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006786 }
6787
6788 /*
6789 * [ WFC: Parsed Entity ]
6790 * An entity reference must not contain the name of an
6791 * unparsed entity
6792 */
6793 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006794 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006795 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006796 }
6797
6798 /*
6799 * [ WFC: No External Entity References ]
6800 * Attribute values cannot contain direct or indirect
6801 * entity references to external entities.
6802 */
6803 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6804 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006805 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006806 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006807 }
6808 /*
6809 * [ WFC: No < in Attribute Values ]
6810 * The replacement text of any entity referred to directly or
6811 * indirectly in an attribute value (other than "&lt;") must
6812 * not contain a <.
6813 */
6814 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6815 (ent != NULL) &&
6816 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6817 (ent->content != NULL) &&
6818 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006819 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6820 "'<' in entity '%s' is not allowed in attributes values\n",
6821 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006822 }
6823
6824 /*
6825 * Internal check, no parameter entities here ...
6826 */
6827 else {
6828 switch (ent->etype) {
6829 case XML_INTERNAL_PARAMETER_ENTITY:
6830 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006831 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6832 "Attempt to reference the parameter entity '%s'\n",
6833 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 break;
6835 default:
6836 break;
6837 }
6838 }
6839
6840 /*
6841 * [ WFC: No Recursion ]
6842 * A parsed entity must not contain a recursive reference
6843 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006844 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006845 */
6846
6847 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006849 }
6850 xmlFree(name);
6851 }
6852 }
6853 *str = ptr;
6854 return(ent);
6855}
6856
6857/**
6858 * xmlParsePEReference:
6859 * @ctxt: an XML parser context
6860 *
6861 * parse PEReference declarations
6862 * The entity content is handled directly by pushing it's content as
6863 * a new input stream.
6864 *
6865 * [69] PEReference ::= '%' Name ';'
6866 *
6867 * [ WFC: No Recursion ]
6868 * A parsed entity must not contain a recursive
6869 * reference to itself, either directly or indirectly.
6870 *
6871 * [ WFC: Entity Declared ]
6872 * In a document without any DTD, a document with only an internal DTD
6873 * subset which contains no parameter entity references, or a document
6874 * with "standalone='yes'", ... ... The declaration of a parameter
6875 * entity must precede any reference to it...
6876 *
6877 * [ VC: Entity Declared ]
6878 * In a document with an external subset or external parameter entities
6879 * with "standalone='no'", ... ... The declaration of a parameter entity
6880 * must precede any reference to it...
6881 *
6882 * [ WFC: In DTD ]
6883 * Parameter-entity references may only appear in the DTD.
6884 * NOTE: misleading but this is handled.
6885 */
6886void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006887xmlParsePEReference(xmlParserCtxtPtr ctxt)
6888{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006889 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006890 xmlEntityPtr entity = NULL;
6891 xmlParserInputPtr input;
6892
6893 if (RAW == '%') {
6894 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006895 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006896 if (name == NULL) {
6897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6898 "xmlParsePEReference: no name\n");
6899 } else {
6900 if (RAW == ';') {
6901 NEXT;
6902 if ((ctxt->sax != NULL) &&
6903 (ctxt->sax->getParameterEntity != NULL))
6904 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6905 name);
6906 if (entity == NULL) {
6907 /*
6908 * [ WFC: Entity Declared ]
6909 * In a document without any DTD, a document with only an
6910 * internal DTD subset which contains no parameter entity
6911 * references, or a document with "standalone='yes'", ...
6912 * ... The declaration of a parameter entity must precede
6913 * any reference to it...
6914 */
6915 if ((ctxt->standalone == 1) ||
6916 ((ctxt->hasExternalSubset == 0) &&
6917 (ctxt->hasPErefs == 0))) {
6918 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6919 "PEReference: %%%s; not found\n",
6920 name);
6921 } else {
6922 /*
6923 * [ VC: Entity Declared ]
6924 * In a document with an external subset or external
6925 * parameter entities with "standalone='no'", ...
6926 * ... The declaration of a parameter entity must
6927 * precede any reference to it...
6928 */
6929 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6930 "PEReference: %%%s; not found\n",
6931 name, NULL);
6932 ctxt->valid = 0;
6933 }
6934 } else {
6935 /*
6936 * Internal checking in case the entity quest barfed
6937 */
6938 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6939 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6940 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6941 "Internal: %%%s; is not a parameter entity\n",
6942 name, NULL);
6943 } else if (ctxt->input->free != deallocblankswrapper) {
6944 input =
6945 xmlNewBlanksWrapperInputStream(ctxt, entity);
6946 xmlPushInput(ctxt, input);
6947 } else {
6948 /*
6949 * TODO !!!
6950 * handle the extra spaces added before and after
6951 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6952 */
6953 input = xmlNewEntityInputStream(ctxt, entity);
6954 xmlPushInput(ctxt, input);
6955 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006956 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006957 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006958 xmlParseTextDecl(ctxt);
6959 if (ctxt->errNo ==
6960 XML_ERR_UNSUPPORTED_ENCODING) {
6961 /*
6962 * The XML REC instructs us to stop parsing
6963 * right here
6964 */
6965 ctxt->instate = XML_PARSER_EOF;
6966 return;
6967 }
6968 }
6969 }
6970 }
6971 ctxt->hasPErefs = 1;
6972 } else {
6973 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6974 }
6975 }
Owen Taylor3473f882001-02-23 17:55:21 +00006976 }
6977}
6978
6979/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006980 * xmlLoadEntityContent:
6981 * @ctxt: an XML parser context
6982 * @entity: an unloaded system entity
6983 *
6984 * Load the original content of the given system entity from the
6985 * ExternalID/SystemID given. This is to be used for Included in Literal
6986 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
6987 *
6988 * Returns 0 in case of success and -1 in case of failure
6989 */
6990static int
6991xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
6992 xmlParserInputPtr input;
6993 xmlBufferPtr buf;
6994 int l, c;
6995 int count = 0;
6996
6997 if ((ctxt == NULL) || (entity == NULL) ||
6998 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
6999 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7000 (entity->content != NULL)) {
7001 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7002 "xmlLoadEntityContent parameter error");
7003 return(-1);
7004 }
7005
7006 if (xmlParserDebugEntities)
7007 xmlGenericError(xmlGenericErrorContext,
7008 "Reading %s entity content input\n", entity->name);
7009
7010 buf = xmlBufferCreate();
7011 if (buf == NULL) {
7012 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7013 "xmlLoadEntityContent parameter error");
7014 return(-1);
7015 }
7016
7017 input = xmlNewEntityInputStream(ctxt, entity);
7018 if (input == NULL) {
7019 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7020 "xmlLoadEntityContent input error");
7021 xmlBufferFree(buf);
7022 return(-1);
7023 }
7024
7025 /*
7026 * Push the entity as the current input, read char by char
7027 * saving to the buffer until the end of the entity or an error
7028 */
7029 xmlPushInput(ctxt, input);
7030 GROW;
7031 c = CUR_CHAR(l);
7032 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7033 (IS_CHAR(c))) {
7034 xmlBufferAdd(buf, ctxt->input->cur, l);
7035 if (count++ > 100) {
7036 count = 0;
7037 GROW;
7038 }
7039 NEXTL(l);
7040 c = CUR_CHAR(l);
7041 }
7042
7043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7044 xmlPopInput(ctxt);
7045 } else if (!IS_CHAR(c)) {
7046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7047 "xmlLoadEntityContent: invalid char value %d\n",
7048 c);
7049 xmlBufferFree(buf);
7050 return(-1);
7051 }
7052 entity->content = buf->content;
7053 buf->content = NULL;
7054 xmlBufferFree(buf);
7055
7056 return(0);
7057}
7058
7059/**
Owen Taylor3473f882001-02-23 17:55:21 +00007060 * xmlParseStringPEReference:
7061 * @ctxt: an XML parser context
7062 * @str: a pointer to an index in the string
7063 *
7064 * parse PEReference declarations
7065 *
7066 * [69] PEReference ::= '%' Name ';'
7067 *
7068 * [ WFC: No Recursion ]
7069 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007070 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007071 *
7072 * [ WFC: Entity Declared ]
7073 * In a document without any DTD, a document with only an internal DTD
7074 * subset which contains no parameter entity references, or a document
7075 * with "standalone='yes'", ... ... The declaration of a parameter
7076 * entity must precede any reference to it...
7077 *
7078 * [ VC: Entity Declared ]
7079 * In a document with an external subset or external parameter entities
7080 * with "standalone='no'", ... ... The declaration of a parameter entity
7081 * must precede any reference to it...
7082 *
7083 * [ WFC: In DTD ]
7084 * Parameter-entity references may only appear in the DTD.
7085 * NOTE: misleading but this is handled.
7086 *
7087 * Returns the string of the entity content.
7088 * str is updated to the current value of the index
7089 */
7090xmlEntityPtr
7091xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7092 const xmlChar *ptr;
7093 xmlChar cur;
7094 xmlChar *name;
7095 xmlEntityPtr entity = NULL;
7096
7097 if ((str == NULL) || (*str == NULL)) return(NULL);
7098 ptr = *str;
7099 cur = *ptr;
7100 if (cur == '%') {
7101 ptr++;
7102 cur = *ptr;
7103 name = xmlParseStringName(ctxt, &ptr);
7104 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7106 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007107 } else {
7108 cur = *ptr;
7109 if (cur == ';') {
7110 ptr++;
7111 cur = *ptr;
7112 if ((ctxt->sax != NULL) &&
7113 (ctxt->sax->getParameterEntity != NULL))
7114 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7115 name);
7116 if (entity == NULL) {
7117 /*
7118 * [ WFC: Entity Declared ]
7119 * In a document without any DTD, a document with only an
7120 * internal DTD subset which contains no parameter entity
7121 * references, or a document with "standalone='yes'", ...
7122 * ... The declaration of a parameter entity must precede
7123 * any reference to it...
7124 */
7125 if ((ctxt->standalone == 1) ||
7126 ((ctxt->hasExternalSubset == 0) &&
7127 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007128 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007129 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007130 } else {
7131 /*
7132 * [ VC: Entity Declared ]
7133 * In a document with an external subset or external
7134 * parameter entities with "standalone='no'", ...
7135 * ... The declaration of a parameter entity must
7136 * precede any reference to it...
7137 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007138 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7139 "PEReference: %%%s; not found\n",
7140 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007141 ctxt->valid = 0;
7142 }
7143 } else {
7144 /*
7145 * Internal checking in case the entity quest barfed
7146 */
7147 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7148 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007149 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7150 "%%%s; is not a parameter entity\n",
7151 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007152 }
7153 }
7154 ctxt->hasPErefs = 1;
7155 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007156 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007157 }
7158 xmlFree(name);
7159 }
7160 }
7161 *str = ptr;
7162 return(entity);
7163}
7164
7165/**
7166 * xmlParseDocTypeDecl:
7167 * @ctxt: an XML parser context
7168 *
7169 * parse a DOCTYPE declaration
7170 *
7171 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7172 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7173 *
7174 * [ VC: Root Element Type ]
7175 * The Name in the document type declaration must match the element
7176 * type of the root element.
7177 */
7178
7179void
7180xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007181 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 xmlChar *ExternalID = NULL;
7183 xmlChar *URI = NULL;
7184
7185 /*
7186 * We know that '<!DOCTYPE' has been detected.
7187 */
7188 SKIP(9);
7189
7190 SKIP_BLANKS;
7191
7192 /*
7193 * Parse the DOCTYPE name.
7194 */
7195 name = xmlParseName(ctxt);
7196 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007197 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7198 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007199 }
7200 ctxt->intSubName = name;
7201
7202 SKIP_BLANKS;
7203
7204 /*
7205 * Check for SystemID and ExternalID
7206 */
7207 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7208
7209 if ((URI != NULL) || (ExternalID != NULL)) {
7210 ctxt->hasExternalSubset = 1;
7211 }
7212 ctxt->extSubURI = URI;
7213 ctxt->extSubSystem = ExternalID;
7214
7215 SKIP_BLANKS;
7216
7217 /*
7218 * Create and update the internal subset.
7219 */
7220 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7221 (!ctxt->disableSAX))
7222 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7223
7224 /*
7225 * Is there any internal subset declarations ?
7226 * they are handled separately in xmlParseInternalSubset()
7227 */
7228 if (RAW == '[')
7229 return;
7230
7231 /*
7232 * We should be at the end of the DOCTYPE declaration.
7233 */
7234 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007235 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007236 }
7237 NEXT;
7238}
7239
7240/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007241 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007242 * @ctxt: an XML parser context
7243 *
7244 * parse the internal subset declaration
7245 *
7246 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7247 */
7248
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007249static void
Owen Taylor3473f882001-02-23 17:55:21 +00007250xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7251 /*
7252 * Is there any DTD definition ?
7253 */
7254 if (RAW == '[') {
7255 ctxt->instate = XML_PARSER_DTD;
7256 NEXT;
7257 /*
7258 * Parse the succession of Markup declarations and
7259 * PEReferences.
7260 * Subsequence (markupdecl | PEReference | S)*
7261 */
7262 while (RAW != ']') {
7263 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007264 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007265
7266 SKIP_BLANKS;
7267 xmlParseMarkupDecl(ctxt);
7268 xmlParsePEReference(ctxt);
7269
7270 /*
7271 * Pop-up of finished entities.
7272 */
7273 while ((RAW == 0) && (ctxt->inputNr > 1))
7274 xmlPopInput(ctxt);
7275
7276 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007277 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007278 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007279 break;
7280 }
7281 }
7282 if (RAW == ']') {
7283 NEXT;
7284 SKIP_BLANKS;
7285 }
7286 }
7287
7288 /*
7289 * We should be at the end of the DOCTYPE declaration.
7290 */
7291 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007292 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007293 }
7294 NEXT;
7295}
7296
Daniel Veillard81273902003-09-30 00:43:48 +00007297#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007298/**
7299 * xmlParseAttribute:
7300 * @ctxt: an XML parser context
7301 * @value: a xmlChar ** used to store the value of the attribute
7302 *
7303 * parse an attribute
7304 *
7305 * [41] Attribute ::= Name Eq AttValue
7306 *
7307 * [ WFC: No External Entity References ]
7308 * Attribute values cannot contain direct or indirect entity references
7309 * to external entities.
7310 *
7311 * [ WFC: No < in Attribute Values ]
7312 * The replacement text of any entity referred to directly or indirectly in
7313 * an attribute value (other than "&lt;") must not contain a <.
7314 *
7315 * [ VC: Attribute Value Type ]
7316 * The attribute must have been declared; the value must be of the type
7317 * declared for it.
7318 *
7319 * [25] Eq ::= S? '=' S?
7320 *
7321 * With namespace:
7322 *
7323 * [NS 11] Attribute ::= QName Eq AttValue
7324 *
7325 * Also the case QName == xmlns:??? is handled independently as a namespace
7326 * definition.
7327 *
7328 * Returns the attribute name, and the value in *value.
7329 */
7330
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007331const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007332xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007333 const xmlChar *name;
7334 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007335
7336 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007337 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007338 name = xmlParseName(ctxt);
7339 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007340 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007341 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007342 return(NULL);
7343 }
7344
7345 /*
7346 * read the value
7347 */
7348 SKIP_BLANKS;
7349 if (RAW == '=') {
7350 NEXT;
7351 SKIP_BLANKS;
7352 val = xmlParseAttValue(ctxt);
7353 ctxt->instate = XML_PARSER_CONTENT;
7354 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007355 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007356 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007357 return(NULL);
7358 }
7359
7360 /*
7361 * Check that xml:lang conforms to the specification
7362 * No more registered as an error, just generate a warning now
7363 * since this was deprecated in XML second edition
7364 */
7365 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7366 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007367 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7368 "Malformed value for xml:lang : %s\n",
7369 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007370 }
7371 }
7372
7373 /*
7374 * Check that xml:space conforms to the specification
7375 */
7376 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7377 if (xmlStrEqual(val, BAD_CAST "default"))
7378 *(ctxt->space) = 0;
7379 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7380 *(ctxt->space) = 1;
7381 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007382 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007383"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007384 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007385 }
7386 }
7387
7388 *value = val;
7389 return(name);
7390}
7391
7392/**
7393 * xmlParseStartTag:
7394 * @ctxt: an XML parser context
7395 *
7396 * parse a start of tag either for rule element or
7397 * EmptyElement. In both case we don't parse the tag closing chars.
7398 *
7399 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7400 *
7401 * [ WFC: Unique Att Spec ]
7402 * No attribute name may appear more than once in the same start-tag or
7403 * empty-element tag.
7404 *
7405 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7406 *
7407 * [ WFC: Unique Att Spec ]
7408 * No attribute name may appear more than once in the same start-tag or
7409 * empty-element tag.
7410 *
7411 * With namespace:
7412 *
7413 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7414 *
7415 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7416 *
7417 * Returns the element name parsed
7418 */
7419
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007420const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007421xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007422 const xmlChar *name;
7423 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007424 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007425 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007426 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007427 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007428 int i;
7429
7430 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007431 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007432
7433 name = xmlParseName(ctxt);
7434 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007436 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007437 return(NULL);
7438 }
7439
7440 /*
7441 * Now parse the attributes, it ends up with the ending
7442 *
7443 * (S Attribute)* S?
7444 */
7445 SKIP_BLANKS;
7446 GROW;
7447
Daniel Veillard21a0f912001-02-25 19:54:14 +00007448 while ((RAW != '>') &&
7449 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007450 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007451 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007452 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007453
7454 attname = xmlParseAttribute(ctxt, &attvalue);
7455 if ((attname != NULL) && (attvalue != NULL)) {
7456 /*
7457 * [ WFC: Unique Att Spec ]
7458 * No attribute name may appear more than once in the same
7459 * start-tag or empty-element tag.
7460 */
7461 for (i = 0; i < nbatts;i += 2) {
7462 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007463 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007464 xmlFree(attvalue);
7465 goto failed;
7466 }
7467 }
Owen Taylor3473f882001-02-23 17:55:21 +00007468 /*
7469 * Add the pair to atts
7470 */
7471 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007472 maxatts = 22; /* allow for 10 attrs by default */
7473 atts = (const xmlChar **)
7474 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007475 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007476 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007477 if (attvalue != NULL)
7478 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007479 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007480 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007481 ctxt->atts = atts;
7482 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007483 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007484 const xmlChar **n;
7485
Owen Taylor3473f882001-02-23 17:55:21 +00007486 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007487 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007488 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007489 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007490 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007491 if (attvalue != NULL)
7492 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007493 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007494 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007495 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007496 ctxt->atts = atts;
7497 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007498 }
7499 atts[nbatts++] = attname;
7500 atts[nbatts++] = attvalue;
7501 atts[nbatts] = NULL;
7502 atts[nbatts + 1] = NULL;
7503 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007504 if (attvalue != NULL)
7505 xmlFree(attvalue);
7506 }
7507
7508failed:
7509
Daniel Veillard3772de32002-12-17 10:31:45 +00007510 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007511 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7512 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007513 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7515 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007516 }
7517 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007518 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7519 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007520 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7521 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007522 break;
7523 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007524 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007525 GROW;
7526 }
7527
7528 /*
7529 * SAX: Start of Element !
7530 */
7531 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007532 (!ctxt->disableSAX)) {
7533 if (nbatts > 0)
7534 ctxt->sax->startElement(ctxt->userData, name, atts);
7535 else
7536 ctxt->sax->startElement(ctxt->userData, name, NULL);
7537 }
Owen Taylor3473f882001-02-23 17:55:21 +00007538
7539 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007540 /* Free only the content strings */
7541 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007542 if (atts[i] != NULL)
7543 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007544 }
7545 return(name);
7546}
7547
7548/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007549 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007550 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007551 * @line: line of the start tag
7552 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007553 *
7554 * parse an end of tag
7555 *
7556 * [42] ETag ::= '</' Name S? '>'
7557 *
7558 * With namespace
7559 *
7560 * [NS 9] ETag ::= '</' QName S? '>'
7561 */
7562
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007563static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007564xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007565 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007566
7567 GROW;
7568 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007569 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007570 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007571 return;
7572 }
7573 SKIP(2);
7574
Daniel Veillard46de64e2002-05-29 08:21:33 +00007575 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007576
7577 /*
7578 * We should definitely be at the ending "S? '>'" part
7579 */
7580 GROW;
7581 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007582 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007583 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007584 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007585 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007586
7587 /*
7588 * [ WFC: Element Type Match ]
7589 * The Name in an element's end-tag must match the element type in the
7590 * start-tag.
7591 *
7592 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007593 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007594 if (name == NULL) name = BAD_CAST "unparseable";
7595 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007596 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007597 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007598 }
7599
7600 /*
7601 * SAX: End of Tag
7602 */
7603 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7604 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007605 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007606
Daniel Veillarde57ec792003-09-10 10:50:59 +00007607 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007608 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007609 return;
7610}
7611
7612/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007613 * xmlParseEndTag:
7614 * @ctxt: an XML parser context
7615 *
7616 * parse an end of tag
7617 *
7618 * [42] ETag ::= '</' Name S? '>'
7619 *
7620 * With namespace
7621 *
7622 * [NS 9] ETag ::= '</' QName S? '>'
7623 */
7624
7625void
7626xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 xmlParseEndTag1(ctxt, 0);
7628}
Daniel Veillard81273902003-09-30 00:43:48 +00007629#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007630
7631/************************************************************************
7632 * *
7633 * SAX 2 specific operations *
7634 * *
7635 ************************************************************************/
7636
7637static const xmlChar *
7638xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7639 int len = 0, l;
7640 int c;
7641 int count = 0;
7642
7643 /*
7644 * Handler for more complex cases
7645 */
7646 GROW;
7647 c = CUR_CHAR(l);
7648 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007649 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650 return(NULL);
7651 }
7652
7653 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007654 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007655 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007656 (IS_COMBINING(c)) ||
7657 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007658 if (count++ > 100) {
7659 count = 0;
7660 GROW;
7661 }
7662 len += l;
7663 NEXTL(l);
7664 c = CUR_CHAR(l);
7665 }
7666 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7667}
7668
7669/*
7670 * xmlGetNamespace:
7671 * @ctxt: an XML parser context
7672 * @prefix: the prefix to lookup
7673 *
7674 * Lookup the namespace name for the @prefix (which ca be NULL)
7675 * The prefix must come from the @ctxt->dict dictionnary
7676 *
7677 * Returns the namespace name or NULL if not bound
7678 */
7679static const xmlChar *
7680xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7681 int i;
7682
Daniel Veillarde57ec792003-09-10 10:50:59 +00007683 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007684 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007685 if (ctxt->nsTab[i] == prefix) {
7686 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7687 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007689 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007690 return(NULL);
7691}
7692
7693/**
7694 * xmlParseNCName:
7695 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007696 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007697 *
7698 * parse an XML name.
7699 *
7700 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7701 * CombiningChar | Extender
7702 *
7703 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7704 *
7705 * Returns the Name parsed or NULL
7706 */
7707
7708static const xmlChar *
7709xmlParseNCName(xmlParserCtxtPtr ctxt) {
7710 const xmlChar *in;
7711 const xmlChar *ret;
7712 int count = 0;
7713
7714 /*
7715 * Accelerator for simple ASCII names
7716 */
7717 in = ctxt->input->cur;
7718 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7719 ((*in >= 0x41) && (*in <= 0x5A)) ||
7720 (*in == '_')) {
7721 in++;
7722 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7723 ((*in >= 0x41) && (*in <= 0x5A)) ||
7724 ((*in >= 0x30) && (*in <= 0x39)) ||
7725 (*in == '_') || (*in == '-') ||
7726 (*in == '.'))
7727 in++;
7728 if ((*in > 0) && (*in < 0x80)) {
7729 count = in - ctxt->input->cur;
7730 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7731 ctxt->input->cur = in;
7732 ctxt->nbChars += count;
7733 ctxt->input->col += count;
7734 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007735 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 }
7737 return(ret);
7738 }
7739 }
7740 return(xmlParseNCNameComplex(ctxt));
7741}
7742
7743/**
7744 * xmlParseQName:
7745 * @ctxt: an XML parser context
7746 * @prefix: pointer to store the prefix part
7747 *
7748 * parse an XML Namespace QName
7749 *
7750 * [6] QName ::= (Prefix ':')? LocalPart
7751 * [7] Prefix ::= NCName
7752 * [8] LocalPart ::= NCName
7753 *
7754 * Returns the Name parsed or NULL
7755 */
7756
7757static const xmlChar *
7758xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7759 const xmlChar *l, *p;
7760
7761 GROW;
7762
7763 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007764 if (l == NULL) {
7765 if (CUR == ':') {
7766 l = xmlParseName(ctxt);
7767 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007768 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7769 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007770 *prefix = NULL;
7771 return(l);
7772 }
7773 }
7774 return(NULL);
7775 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007776 if (CUR == ':') {
7777 NEXT;
7778 p = l;
7779 l = xmlParseNCName(ctxt);
7780 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007781 xmlChar *tmp;
7782
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007783 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7784 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007785 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7786 p = xmlDictLookup(ctxt->dict, tmp, -1);
7787 if (tmp != NULL) xmlFree(tmp);
7788 *prefix = NULL;
7789 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007790 }
7791 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007792 xmlChar *tmp;
7793
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007794 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7795 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007796 NEXT;
7797 tmp = (xmlChar *) xmlParseName(ctxt);
7798 if (tmp != NULL) {
7799 tmp = xmlBuildQName(tmp, l, NULL, 0);
7800 l = xmlDictLookup(ctxt->dict, tmp, -1);
7801 if (tmp != NULL) xmlFree(tmp);
7802 *prefix = p;
7803 return(l);
7804 }
7805 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7806 l = xmlDictLookup(ctxt->dict, tmp, -1);
7807 if (tmp != NULL) xmlFree(tmp);
7808 *prefix = p;
7809 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007810 }
7811 *prefix = p;
7812 } else
7813 *prefix = NULL;
7814 return(l);
7815}
7816
7817/**
7818 * xmlParseQNameAndCompare:
7819 * @ctxt: an XML parser context
7820 * @name: the localname
7821 * @prefix: the prefix, if any.
7822 *
7823 * parse an XML name and compares for match
7824 * (specialized for endtag parsing)
7825 *
7826 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7827 * and the name for mismatch
7828 */
7829
7830static const xmlChar *
7831xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7832 xmlChar const *prefix) {
7833 const xmlChar *cmp = name;
7834 const xmlChar *in;
7835 const xmlChar *ret;
7836 const xmlChar *prefix2;
7837
7838 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7839
7840 GROW;
7841 in = ctxt->input->cur;
7842
7843 cmp = prefix;
7844 while (*in != 0 && *in == *cmp) {
7845 ++in;
7846 ++cmp;
7847 }
7848 if ((*cmp == 0) && (*in == ':')) {
7849 in++;
7850 cmp = name;
7851 while (*in != 0 && *in == *cmp) {
7852 ++in;
7853 ++cmp;
7854 }
William M. Brack76e95df2003-10-18 16:20:14 +00007855 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007856 /* success */
7857 ctxt->input->cur = in;
7858 return((const xmlChar*) 1);
7859 }
7860 }
7861 /*
7862 * all strings coms from the dictionary, equality can be done directly
7863 */
7864 ret = xmlParseQName (ctxt, &prefix2);
7865 if ((ret == name) && (prefix == prefix2))
7866 return((const xmlChar*) 1);
7867 return ret;
7868}
7869
7870/**
7871 * xmlParseAttValueInternal:
7872 * @ctxt: an XML parser context
7873 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007874 * @alloc: whether the attribute was reallocated as a new string
7875 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007876 *
7877 * parse a value for an attribute.
7878 * NOTE: if no normalization is needed, the routine will return pointers
7879 * directly from the data buffer.
7880 *
7881 * 3.3.3 Attribute-Value Normalization:
7882 * Before the value of an attribute is passed to the application or
7883 * checked for validity, the XML processor must normalize it as follows:
7884 * - a character reference is processed by appending the referenced
7885 * character to the attribute value
7886 * - an entity reference is processed by recursively processing the
7887 * replacement text of the entity
7888 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7889 * appending #x20 to the normalized value, except that only a single
7890 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7891 * parsed entity or the literal entity value of an internal parsed entity
7892 * - other characters are processed by appending them to the normalized value
7893 * If the declared value is not CDATA, then the XML processor must further
7894 * process the normalized attribute value by discarding any leading and
7895 * trailing space (#x20) characters, and by replacing sequences of space
7896 * (#x20) characters by a single space (#x20) character.
7897 * All attributes for which no declaration has been read should be treated
7898 * by a non-validating parser as if declared CDATA.
7899 *
7900 * Returns the AttValue parsed or NULL. The value has to be freed by the
7901 * caller if it was copied, this can be detected by val[*len] == 0.
7902 */
7903
7904static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007905xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7906 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007907{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007909 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007910 xmlChar *ret = NULL;
7911
7912 GROW;
7913 in = (xmlChar *) CUR_PTR;
7914 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007915 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007916 return (NULL);
7917 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007920 /*
7921 * try to handle in this routine the most common case where no
7922 * allocation of a new string is required and where content is
7923 * pure ASCII.
7924 */
7925 limit = *in++;
7926 end = ctxt->input->end;
7927 start = in;
7928 if (in >= end) {
7929 const xmlChar *oldbase = ctxt->input->base;
7930 GROW;
7931 if (oldbase != ctxt->input->base) {
7932 long delta = ctxt->input->base - oldbase;
7933 start = start + delta;
7934 in = in + delta;
7935 }
7936 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007937 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007938 if (normalize) {
7939 /*
7940 * Skip any leading spaces
7941 */
7942 while ((in < end) && (*in != limit) &&
7943 ((*in == 0x20) || (*in == 0x9) ||
7944 (*in == 0xA) || (*in == 0xD))) {
7945 in++;
7946 start = in;
7947 if (in >= end) {
7948 const xmlChar *oldbase = ctxt->input->base;
7949 GROW;
7950 if (oldbase != ctxt->input->base) {
7951 long delta = ctxt->input->base - oldbase;
7952 start = start + delta;
7953 in = in + delta;
7954 }
7955 end = ctxt->input->end;
7956 }
7957 }
7958 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7959 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7960 if ((*in++ == 0x20) && (*in == 0x20)) break;
7961 if (in >= end) {
7962 const xmlChar *oldbase = ctxt->input->base;
7963 GROW;
7964 if (oldbase != ctxt->input->base) {
7965 long delta = ctxt->input->base - oldbase;
7966 start = start + delta;
7967 in = in + delta;
7968 }
7969 end = ctxt->input->end;
7970 }
7971 }
7972 last = in;
7973 /*
7974 * skip the trailing blanks
7975 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007976 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007977 while ((in < end) && (*in != limit) &&
7978 ((*in == 0x20) || (*in == 0x9) ||
7979 (*in == 0xA) || (*in == 0xD))) {
7980 in++;
7981 if (in >= end) {
7982 const xmlChar *oldbase = ctxt->input->base;
7983 GROW;
7984 if (oldbase != ctxt->input->base) {
7985 long delta = ctxt->input->base - oldbase;
7986 start = start + delta;
7987 in = in + delta;
7988 last = last + delta;
7989 }
7990 end = ctxt->input->end;
7991 }
7992 }
7993 if (*in != limit) goto need_complex;
7994 } else {
7995 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7996 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7997 in++;
7998 if (in >= end) {
7999 const xmlChar *oldbase = ctxt->input->base;
8000 GROW;
8001 if (oldbase != ctxt->input->base) {
8002 long delta = ctxt->input->base - oldbase;
8003 start = start + delta;
8004 in = in + delta;
8005 }
8006 end = ctxt->input->end;
8007 }
8008 }
8009 last = in;
8010 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008011 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008012 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008013 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008014 *len = last - start;
8015 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008017 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008018 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008019 }
8020 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008021 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008023need_complex:
8024 if (alloc) *alloc = 1;
8025 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026}
8027
8028/**
8029 * xmlParseAttribute2:
8030 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008031 * @pref: the element prefix
8032 * @elem: the element name
8033 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008035 * @len: an int * to save the length of the attribute
8036 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008037 *
8038 * parse an attribute in the new SAX2 framework.
8039 *
8040 * Returns the attribute name, and the value in *value, .
8041 */
8042
8043static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008044xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008045 const xmlChar * pref, const xmlChar * elem,
8046 const xmlChar ** prefix, xmlChar ** value,
8047 int *len, int *alloc)
8048{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008050 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008051 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008052
8053 *value = NULL;
8054 GROW;
8055 name = xmlParseQName(ctxt, prefix);
8056 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008057 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8058 "error parsing attribute name\n");
8059 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008060 }
8061
8062 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008063 * get the type if needed
8064 */
8065 if (ctxt->attsSpecial != NULL) {
8066 int type;
8067
8068 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008069 pref, elem, *prefix, name);
8070 if (type != 0)
8071 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008072 }
8073
8074 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008075 * read the value
8076 */
8077 SKIP_BLANKS;
8078 if (RAW == '=') {
8079 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008080 SKIP_BLANKS;
8081 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8082 if (normalize) {
8083 /*
8084 * Sometimes a second normalisation pass for spaces is needed
8085 * but that only happens if charrefs or entities refernces
8086 * have been used in the attribute value, i.e. the attribute
8087 * value have been extracted in an allocated string already.
8088 */
8089 if (*alloc) {
8090 const xmlChar *val2;
8091
8092 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8093 if (val2 != NULL) {
8094 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008095 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008096 }
8097 }
8098 }
8099 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008100 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008101 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8102 "Specification mandate value for attribute %s\n",
8103 name);
8104 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008105 }
8106
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008107 if (*prefix == ctxt->str_xml) {
8108 /*
8109 * Check that xml:lang conforms to the specification
8110 * No more registered as an error, just generate a warning now
8111 * since this was deprecated in XML second edition
8112 */
8113 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8114 internal_val = xmlStrndup(val, *len);
8115 if (!xmlCheckLanguageID(internal_val)) {
8116 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8117 "Malformed value for xml:lang : %s\n",
8118 internal_val, NULL);
8119 }
8120 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008121
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008122 /*
8123 * Check that xml:space conforms to the specification
8124 */
8125 if (xmlStrEqual(name, BAD_CAST "space")) {
8126 internal_val = xmlStrndup(val, *len);
8127 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8128 *(ctxt->space) = 0;
8129 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8130 *(ctxt->space) = 1;
8131 else {
8132 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8133 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8134 internal_val, NULL);
8135 }
8136 }
8137 if (internal_val) {
8138 xmlFree(internal_val);
8139 }
8140 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008141
8142 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008143 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008144}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008145/**
8146 * xmlParseStartTag2:
8147 * @ctxt: an XML parser context
8148 *
8149 * parse a start of tag either for rule element or
8150 * EmptyElement. In both case we don't parse the tag closing chars.
8151 * This routine is called when running SAX2 parsing
8152 *
8153 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8154 *
8155 * [ WFC: Unique Att Spec ]
8156 * No attribute name may appear more than once in the same start-tag or
8157 * empty-element tag.
8158 *
8159 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8160 *
8161 * [ WFC: Unique Att Spec ]
8162 * No attribute name may appear more than once in the same start-tag or
8163 * empty-element tag.
8164 *
8165 * With namespace:
8166 *
8167 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8168 *
8169 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8170 *
8171 * Returns the element name parsed
8172 */
8173
8174static const xmlChar *
8175xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008176 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008177 const xmlChar *localname;
8178 const xmlChar *prefix;
8179 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008180 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181 const xmlChar *nsname;
8182 xmlChar *attvalue;
8183 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008185 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008186 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008187 const xmlChar *base;
8188 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008189 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008190
8191 if (RAW != '<') return(NULL);
8192 NEXT1;
8193
8194 /*
8195 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8196 * point since the attribute values may be stored as pointers to
8197 * the buffer and calling SHRINK would destroy them !
8198 * The Shrinking is only possible once the full set of attribute
8199 * callbacks have been done.
8200 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008201reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008202 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008203 base = ctxt->input->base;
8204 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008205 oldline = ctxt->input->line;
8206 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008207 nbatts = 0;
8208 nratts = 0;
8209 nbdef = 0;
8210 nbNs = 0;
8211 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008212 /* Forget any namespaces added during an earlier parse of this element. */
8213 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008214
8215 localname = xmlParseQName(ctxt, &prefix);
8216 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008217 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8218 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008219 return(NULL);
8220 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008221 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222
8223 /*
8224 * Now parse the attributes, it ends up with the ending
8225 *
8226 * (S Attribute)* S?
8227 */
8228 SKIP_BLANKS;
8229 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008230 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231
8232 while ((RAW != '>') &&
8233 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008234 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008235 const xmlChar *q = CUR_PTR;
8236 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008237 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008238
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008239 attname = xmlParseAttribute2(ctxt, prefix, localname,
8240 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008241 if (ctxt->input->base != base) {
8242 if ((attvalue != NULL) && (alloc != 0))
8243 xmlFree(attvalue);
8244 attvalue = NULL;
8245 goto base_changed;
8246 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 if ((attname != NULL) && (attvalue != NULL)) {
8248 if (len < 0) len = xmlStrlen(attvalue);
8249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8251 xmlURIPtr uri;
8252
8253 if (*URL != 0) {
8254 uri = xmlParseURI((const char *) URL);
8255 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008256 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8257 "xmlns: %s not a valid URI\n",
8258 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008259 } else {
8260 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008261 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8262 "xmlns: URI %s is not absolute\n",
8263 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008264 }
8265 xmlFreeURI(uri);
8266 }
8267 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008269 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008271 for (j = 1;j <= nbNs;j++)
8272 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8273 break;
8274 if (j <= nbNs)
8275 xmlErrAttributeDup(ctxt, NULL, attname);
8276 else
8277 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008278 if (alloc != 0) xmlFree(attvalue);
8279 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008280 continue;
8281 }
8282 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008283 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8284 xmlURIPtr uri;
8285
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008286 if (attname == ctxt->str_xml) {
8287 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008288 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8289 "xml namespace prefix mapped to wrong URI\n",
8290 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008291 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008292 /*
8293 * Do not keep a namespace definition node
8294 */
8295 if (alloc != 0) xmlFree(attvalue);
8296 SKIP_BLANKS;
8297 continue;
8298 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008299 uri = xmlParseURI((const char *) URL);
8300 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008301 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8302 "xmlns:%s: '%s' is not a valid URI\n",
8303 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008304 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008305 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008306 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8307 "xmlns:%s: URI %s is not absolute\n",
8308 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008309 }
8310 xmlFreeURI(uri);
8311 }
8312
Daniel Veillard0fb18932003-09-07 09:14:37 +00008313 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008314 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008316 for (j = 1;j <= nbNs;j++)
8317 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8318 break;
8319 if (j <= nbNs)
8320 xmlErrAttributeDup(ctxt, aprefix, attname);
8321 else
8322 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008323 if (alloc != 0) xmlFree(attvalue);
8324 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008325 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008326 continue;
8327 }
8328
8329 /*
8330 * Add the pair to atts
8331 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008332 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8333 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008334 if (attvalue[len] == 0)
8335 xmlFree(attvalue);
8336 goto failed;
8337 }
8338 maxatts = ctxt->maxatts;
8339 atts = ctxt->atts;
8340 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008341 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008342 atts[nbatts++] = attname;
8343 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008344 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008345 atts[nbatts++] = attvalue;
8346 attvalue += len;
8347 atts[nbatts++] = attvalue;
8348 /*
8349 * tag if some deallocation is needed
8350 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008351 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008352 } else {
8353 if ((attvalue != NULL) && (attvalue[len] == 0))
8354 xmlFree(attvalue);
8355 }
8356
8357failed:
8358
8359 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008360 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008361 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8362 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008363 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8365 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008366 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008367 }
8368 SKIP_BLANKS;
8369 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8370 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008371 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008372 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008373 break;
8374 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008375 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008376 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008377 }
8378
Daniel Veillard0fb18932003-09-07 09:14:37 +00008379 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008380 * The attributes defaulting
8381 */
8382 if (ctxt->attsDefault != NULL) {
8383 xmlDefAttrsPtr defaults;
8384
8385 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8386 if (defaults != NULL) {
8387 for (i = 0;i < defaults->nbAttrs;i++) {
8388 attname = defaults->values[4 * i];
8389 aprefix = defaults->values[4 * i + 1];
8390
8391 /*
8392 * special work for namespaces defaulted defs
8393 */
8394 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8395 /*
8396 * check that it's not a defined namespace
8397 */
8398 for (j = 1;j <= nbNs;j++)
8399 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8400 break;
8401 if (j <= nbNs) continue;
8402
8403 nsname = xmlGetNamespace(ctxt, NULL);
8404 if (nsname != defaults->values[4 * i + 2]) {
8405 if (nsPush(ctxt, NULL,
8406 defaults->values[4 * i + 2]) > 0)
8407 nbNs++;
8408 }
8409 } else if (aprefix == ctxt->str_xmlns) {
8410 /*
8411 * check that it's not a defined namespace
8412 */
8413 for (j = 1;j <= nbNs;j++)
8414 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8415 break;
8416 if (j <= nbNs) continue;
8417
8418 nsname = xmlGetNamespace(ctxt, attname);
8419 if (nsname != defaults->values[2]) {
8420 if (nsPush(ctxt, attname,
8421 defaults->values[4 * i + 2]) > 0)
8422 nbNs++;
8423 }
8424 } else {
8425 /*
8426 * check that it's not a defined attribute
8427 */
8428 for (j = 0;j < nbatts;j+=5) {
8429 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8430 break;
8431 }
8432 if (j < nbatts) continue;
8433
8434 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8435 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008436 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008437 }
8438 maxatts = ctxt->maxatts;
8439 atts = ctxt->atts;
8440 }
8441 atts[nbatts++] = attname;
8442 atts[nbatts++] = aprefix;
8443 if (aprefix == NULL)
8444 atts[nbatts++] = NULL;
8445 else
8446 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8447 atts[nbatts++] = defaults->values[4 * i + 2];
8448 atts[nbatts++] = defaults->values[4 * i + 3];
8449 nbdef++;
8450 }
8451 }
8452 }
8453 }
8454
Daniel Veillarde70c8772003-11-25 07:21:18 +00008455 /*
8456 * The attributes checkings
8457 */
8458 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008459 /*
8460 * The default namespace does not apply to attribute names.
8461 */
8462 if (atts[i + 1] != NULL) {
8463 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8464 if (nsname == NULL) {
8465 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8466 "Namespace prefix %s for %s on %s is not defined\n",
8467 atts[i + 1], atts[i], localname);
8468 }
8469 atts[i + 2] = nsname;
8470 } else
8471 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
8475 * start-tag or empty-element tag.
8476 * As extended by the Namespace in XML REC.
8477 */
8478 for (j = 0; j < i;j += 5) {
8479 if (atts[i] == atts[j]) {
8480 if (atts[i+1] == atts[j+1]) {
8481 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8482 break;
8483 }
8484 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8485 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8486 "Namespaced Attribute %s in '%s' redefined\n",
8487 atts[i], nsname, NULL);
8488 break;
8489 }
8490 }
8491 }
8492 }
8493
Daniel Veillarde57ec792003-09-10 10:50:59 +00008494 nsname = xmlGetNamespace(ctxt, prefix);
8495 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008496 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8497 "Namespace prefix %s on %s is not defined\n",
8498 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008499 }
8500 *pref = prefix;
8501 *URI = nsname;
8502
8503 /*
8504 * SAX: Start of Element !
8505 */
8506 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8507 (!ctxt->disableSAX)) {
8508 if (nbNs > 0)
8509 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8510 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8511 nbatts / 5, nbdef, atts);
8512 else
8513 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8514 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8515 }
8516
8517 /*
8518 * Free up attribute allocated strings if needed
8519 */
8520 if (attval != 0) {
8521 for (i = 3,j = 0; j < nratts;i += 5,j++)
8522 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8523 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008524 }
8525
8526 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008527
8528base_changed:
8529 /*
8530 * the attribute strings are valid iif the base didn't changed
8531 */
8532 if (attval != 0) {
8533 for (i = 3,j = 0; j < nratts;i += 5,j++)
8534 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8535 xmlFree((xmlChar *) atts[i]);
8536 }
8537 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008538 ctxt->input->line = oldline;
8539 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008540 if (ctxt->wellFormed == 1) {
8541 goto reparse;
8542 }
8543 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544}
8545
8546/**
8547 * xmlParseEndTag2:
8548 * @ctxt: an XML parser context
8549 * @line: line of the start tag
8550 * @nsNr: number of namespaces on the start tag
8551 *
8552 * parse an end of tag
8553 *
8554 * [42] ETag ::= '</' Name S? '>'
8555 *
8556 * With namespace
8557 *
8558 * [NS 9] ETag ::= '</' QName S? '>'
8559 */
8560
8561static void
8562xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008563 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 const xmlChar *name;
8565
8566 GROW;
8567 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008568 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 return;
8570 }
8571 SKIP(2);
8572
William M. Brack13dfa872004-09-18 04:52:08 +00008573 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008574 if (ctxt->input->cur[tlen] == '>') {
8575 ctxt->input->cur += tlen + 1;
8576 goto done;
8577 }
8578 ctxt->input->cur += tlen;
8579 name = (xmlChar*)1;
8580 } else {
8581 if (prefix == NULL)
8582 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8583 else
8584 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8585 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586
8587 /*
8588 * We should definitely be at the ending "S? '>'" part
8589 */
8590 GROW;
8591 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008592 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008593 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008594 } else
8595 NEXT1;
8596
8597 /*
8598 * [ WFC: Element Type Match ]
8599 * The Name in an element's end-tag must match the element type in the
8600 * start-tag.
8601 *
8602 */
8603 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008604 if (name == NULL) name = BAD_CAST "unparseable";
8605 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008607 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008608 }
8609
8610 /*
8611 * SAX: End of Tag
8612 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008613done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008614 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8615 (!ctxt->disableSAX))
8616 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8617
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 spacePop(ctxt);
8619 if (nsNr != 0)
8620 nsPop(ctxt, nsNr);
8621 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008622}
8623
8624/**
Owen Taylor3473f882001-02-23 17:55:21 +00008625 * xmlParseCDSect:
8626 * @ctxt: an XML parser context
8627 *
8628 * Parse escaped pure raw content.
8629 *
8630 * [18] CDSect ::= CDStart CData CDEnd
8631 *
8632 * [19] CDStart ::= '<![CDATA['
8633 *
8634 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8635 *
8636 * [21] CDEnd ::= ']]>'
8637 */
8638void
8639xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8640 xmlChar *buf = NULL;
8641 int len = 0;
8642 int size = XML_PARSER_BUFFER_SIZE;
8643 int r, rl;
8644 int s, sl;
8645 int cur, l;
8646 int count = 0;
8647
Daniel Veillard8f597c32003-10-06 08:19:27 +00008648 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008649 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008650 SKIP(9);
8651 } else
8652 return;
8653
8654 ctxt->instate = XML_PARSER_CDATA_SECTION;
8655 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008656 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008657 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008658 ctxt->instate = XML_PARSER_CONTENT;
8659 return;
8660 }
8661 NEXTL(rl);
8662 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008663 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008664 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008665 ctxt->instate = XML_PARSER_CONTENT;
8666 return;
8667 }
8668 NEXTL(sl);
8669 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008670 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008671 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008672 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008673 return;
8674 }
William M. Brack871611b2003-10-18 04:53:14 +00008675 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008676 ((r != ']') || (s != ']') || (cur != '>'))) {
8677 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008678 xmlChar *tmp;
8679
Owen Taylor3473f882001-02-23 17:55:21 +00008680 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008681 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8682 if (tmp == NULL) {
8683 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008685 return;
8686 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008687 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008688 }
8689 COPY_BUF(rl,buf,len,r);
8690 r = s;
8691 rl = sl;
8692 s = cur;
8693 sl = l;
8694 count++;
8695 if (count > 50) {
8696 GROW;
8697 count = 0;
8698 }
8699 NEXTL(l);
8700 cur = CUR_CHAR(l);
8701 }
8702 buf[len] = 0;
8703 ctxt->instate = XML_PARSER_CONTENT;
8704 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008705 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008706 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 xmlFree(buf);
8708 return;
8709 }
8710 NEXTL(l);
8711
8712 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008713 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008714 */
8715 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8716 if (ctxt->sax->cdataBlock != NULL)
8717 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008718 else if (ctxt->sax->characters != NULL)
8719 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008720 }
8721 xmlFree(buf);
8722}
8723
8724/**
8725 * xmlParseContent:
8726 * @ctxt: an XML parser context
8727 *
8728 * Parse a content:
8729 *
8730 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8731 */
8732
8733void
8734xmlParseContent(xmlParserCtxtPtr ctxt) {
8735 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008736 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008737 ((RAW != '<') || (NXT(1) != '/')) &&
8738 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008739 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008740 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008741 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008742
8743 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008744 * First case : a Processing Instruction.
8745 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008746 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008747 xmlParsePI(ctxt);
8748 }
8749
8750 /*
8751 * Second case : a CDSection
8752 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008753 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008754 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008755 xmlParseCDSect(ctxt);
8756 }
8757
8758 /*
8759 * Third case : a comment
8760 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008761 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008762 (NXT(2) == '-') && (NXT(3) == '-')) {
8763 xmlParseComment(ctxt);
8764 ctxt->instate = XML_PARSER_CONTENT;
8765 }
8766
8767 /*
8768 * Fourth case : a sub-element.
8769 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008770 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008771 xmlParseElement(ctxt);
8772 }
8773
8774 /*
8775 * Fifth case : a reference. If if has not been resolved,
8776 * parsing returns it's Name, create the node
8777 */
8778
Daniel Veillard21a0f912001-02-25 19:54:14 +00008779 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008780 xmlParseReference(ctxt);
8781 }
8782
8783 /*
8784 * Last case, text. Note that References are handled directly.
8785 */
8786 else {
8787 xmlParseCharData(ctxt, 0);
8788 }
8789
8790 GROW;
8791 /*
8792 * Pop-up of finished entities.
8793 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008794 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008795 xmlPopInput(ctxt);
8796 SHRINK;
8797
Daniel Veillardfdc91562002-07-01 21:52:03 +00008798 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008799 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8800 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008801 ctxt->instate = XML_PARSER_EOF;
8802 break;
8803 }
8804 }
8805}
8806
8807/**
8808 * xmlParseElement:
8809 * @ctxt: an XML parser context
8810 *
8811 * parse an XML element, this is highly recursive
8812 *
8813 * [39] element ::= EmptyElemTag | STag content ETag
8814 *
8815 * [ WFC: Element Type Match ]
8816 * The Name in an element's end-tag must match the element type in the
8817 * start-tag.
8818 *
Owen Taylor3473f882001-02-23 17:55:21 +00008819 */
8820
8821void
8822xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008823 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 const xmlChar *prefix;
8825 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008826 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008827 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008828 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008829 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008830
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008831 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8832 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8833 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8834 xmlParserMaxDepth);
8835 ctxt->instate = XML_PARSER_EOF;
8836 return;
8837 }
8838
Owen Taylor3473f882001-02-23 17:55:21 +00008839 /* Capture start position */
8840 if (ctxt->record_info) {
8841 node_info.begin_pos = ctxt->input->consumed +
8842 (CUR_PTR - ctxt->input->base);
8843 node_info.begin_line = ctxt->input->line;
8844 }
8845
8846 if (ctxt->spaceNr == 0)
8847 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008848 else if (*ctxt->space == -2)
8849 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008850 else
8851 spacePush(ctxt, *ctxt->space);
8852
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008853 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008854#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008856#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008857 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008858#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 else
8860 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008861#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008862 if (name == NULL) {
8863 spacePop(ctxt);
8864 return;
8865 }
8866 namePush(ctxt, name);
8867 ret = ctxt->node;
8868
Daniel Veillard4432df22003-09-28 18:58:27 +00008869#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008870 /*
8871 * [ VC: Root Element Type ]
8872 * The Name in the document type declaration must match the element
8873 * type of the root element.
8874 */
8875 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8876 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8877 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008878#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008879
8880 /*
8881 * Check for an Empty Element.
8882 */
8883 if ((RAW == '/') && (NXT(1) == '>')) {
8884 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 if (ctxt->sax2) {
8886 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8887 (!ctxt->disableSAX))
8888 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008889#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 } else {
8891 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8892 (!ctxt->disableSAX))
8893 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008894#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008895 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008896 namePop(ctxt);
8897 spacePop(ctxt);
8898 if (nsNr != ctxt->nsNr)
8899 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008900 if ( ret != NULL && ctxt->record_info ) {
8901 node_info.end_pos = ctxt->input->consumed +
8902 (CUR_PTR - ctxt->input->base);
8903 node_info.end_line = ctxt->input->line;
8904 node_info.node = ret;
8905 xmlParserAddNodeInfo(ctxt, &node_info);
8906 }
8907 return;
8908 }
8909 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008910 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008911 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008912 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8913 "Couldn't find end of Start Tag %s line %d\n",
8914 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008915
8916 /*
8917 * end of parsing of this node.
8918 */
8919 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008921 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008922 if (nsNr != ctxt->nsNr)
8923 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008924
8925 /*
8926 * Capture end position and add node
8927 */
8928 if ( ret != NULL && ctxt->record_info ) {
8929 node_info.end_pos = ctxt->input->consumed +
8930 (CUR_PTR - ctxt->input->base);
8931 node_info.end_line = ctxt->input->line;
8932 node_info.node = ret;
8933 xmlParserAddNodeInfo(ctxt, &node_info);
8934 }
8935 return;
8936 }
8937
8938 /*
8939 * Parse the content of the element:
8940 */
8941 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008942 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008943 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008944 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008945 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008946
8947 /*
8948 * end of parsing of this node.
8949 */
8950 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008951 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008952 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008953 if (nsNr != ctxt->nsNr)
8954 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008955 return;
8956 }
8957
8958 /*
8959 * parse the end of tag: '</' should be here.
8960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008961 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008962 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008963 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008964 }
8965#ifdef LIBXML_SAX1_ENABLED
8966 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008967 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008968#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008969
8970 /*
8971 * Capture end position and add node
8972 */
8973 if ( ret != NULL && ctxt->record_info ) {
8974 node_info.end_pos = ctxt->input->consumed +
8975 (CUR_PTR - ctxt->input->base);
8976 node_info.end_line = ctxt->input->line;
8977 node_info.node = ret;
8978 xmlParserAddNodeInfo(ctxt, &node_info);
8979 }
8980}
8981
8982/**
8983 * xmlParseVersionNum:
8984 * @ctxt: an XML parser context
8985 *
8986 * parse the XML version value.
8987 *
8988 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8989 *
8990 * Returns the string giving the XML version number, or NULL
8991 */
8992xmlChar *
8993xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8994 xmlChar *buf = NULL;
8995 int len = 0;
8996 int size = 10;
8997 xmlChar cur;
8998
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008999 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009000 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009001 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009002 return(NULL);
9003 }
9004 cur = CUR;
9005 while (((cur >= 'a') && (cur <= 'z')) ||
9006 ((cur >= 'A') && (cur <= 'Z')) ||
9007 ((cur >= '0') && (cur <= '9')) ||
9008 (cur == '_') || (cur == '.') ||
9009 (cur == ':') || (cur == '-')) {
9010 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009011 xmlChar *tmp;
9012
Owen Taylor3473f882001-02-23 17:55:21 +00009013 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009014 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9015 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009016 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009017 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009018 return(NULL);
9019 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009020 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009021 }
9022 buf[len++] = cur;
9023 NEXT;
9024 cur=CUR;
9025 }
9026 buf[len] = 0;
9027 return(buf);
9028}
9029
9030/**
9031 * xmlParseVersionInfo:
9032 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009033 *
Owen Taylor3473f882001-02-23 17:55:21 +00009034 * parse the XML version.
9035 *
9036 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009037 *
Owen Taylor3473f882001-02-23 17:55:21 +00009038 * [25] Eq ::= S? '=' S?
9039 *
9040 * Returns the version string, e.g. "1.0"
9041 */
9042
9043xmlChar *
9044xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9045 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009046
Daniel Veillarda07050d2003-10-19 14:46:32 +00009047 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009048 SKIP(7);
9049 SKIP_BLANKS;
9050 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009051 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009052 return(NULL);
9053 }
9054 NEXT;
9055 SKIP_BLANKS;
9056 if (RAW == '"') {
9057 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009058 version = xmlParseVersionNum(ctxt);
9059 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009060 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009061 } else
9062 NEXT;
9063 } else if (RAW == '\''){
9064 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009065 version = xmlParseVersionNum(ctxt);
9066 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009067 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009068 } else
9069 NEXT;
9070 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009071 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009072 }
9073 }
9074 return(version);
9075}
9076
9077/**
9078 * xmlParseEncName:
9079 * @ctxt: an XML parser context
9080 *
9081 * parse the XML encoding name
9082 *
9083 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9084 *
9085 * Returns the encoding name value or NULL
9086 */
9087xmlChar *
9088xmlParseEncName(xmlParserCtxtPtr ctxt) {
9089 xmlChar *buf = NULL;
9090 int len = 0;
9091 int size = 10;
9092 xmlChar cur;
9093
9094 cur = CUR;
9095 if (((cur >= 'a') && (cur <= 'z')) ||
9096 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009097 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009098 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009099 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009100 return(NULL);
9101 }
9102
9103 buf[len++] = cur;
9104 NEXT;
9105 cur = CUR;
9106 while (((cur >= 'a') && (cur <= 'z')) ||
9107 ((cur >= 'A') && (cur <= 'Z')) ||
9108 ((cur >= '0') && (cur <= '9')) ||
9109 (cur == '.') || (cur == '_') ||
9110 (cur == '-')) {
9111 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009112 xmlChar *tmp;
9113
Owen Taylor3473f882001-02-23 17:55:21 +00009114 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009115 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9116 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009117 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009118 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009119 return(NULL);
9120 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009121 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009122 }
9123 buf[len++] = cur;
9124 NEXT;
9125 cur = CUR;
9126 if (cur == 0) {
9127 SHRINK;
9128 GROW;
9129 cur = CUR;
9130 }
9131 }
9132 buf[len] = 0;
9133 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009134 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009135 }
9136 return(buf);
9137}
9138
9139/**
9140 * xmlParseEncodingDecl:
9141 * @ctxt: an XML parser context
9142 *
9143 * parse the XML encoding declaration
9144 *
9145 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9146 *
9147 * this setups the conversion filters.
9148 *
9149 * Returns the encoding value or NULL
9150 */
9151
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009152const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009153xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9154 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009155
9156 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009157 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009158 SKIP(8);
9159 SKIP_BLANKS;
9160 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009161 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009162 return(NULL);
9163 }
9164 NEXT;
9165 SKIP_BLANKS;
9166 if (RAW == '"') {
9167 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009168 encoding = xmlParseEncName(ctxt);
9169 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009170 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009171 } else
9172 NEXT;
9173 } else if (RAW == '\''){
9174 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009175 encoding = xmlParseEncName(ctxt);
9176 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009177 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009178 } else
9179 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009180 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009181 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009182 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009183 /*
9184 * UTF-16 encoding stwich has already taken place at this stage,
9185 * more over the little-endian/big-endian selection is already done
9186 */
9187 if ((encoding != NULL) &&
9188 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9189 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009190 if (ctxt->encoding != NULL)
9191 xmlFree((xmlChar *) ctxt->encoding);
9192 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009193 }
9194 /*
9195 * UTF-8 encoding is handled natively
9196 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009197 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009198 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9199 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009200 if (ctxt->encoding != NULL)
9201 xmlFree((xmlChar *) ctxt->encoding);
9202 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009203 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009204 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009205 xmlCharEncodingHandlerPtr handler;
9206
9207 if (ctxt->input->encoding != NULL)
9208 xmlFree((xmlChar *) ctxt->input->encoding);
9209 ctxt->input->encoding = encoding;
9210
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009211 handler = xmlFindCharEncodingHandler((const char *) encoding);
9212 if (handler != NULL) {
9213 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009214 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009215 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009216 "Unsupported encoding %s\n", encoding);
9217 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009218 }
9219 }
9220 }
9221 return(encoding);
9222}
9223
9224/**
9225 * xmlParseSDDecl:
9226 * @ctxt: an XML parser context
9227 *
9228 * parse the XML standalone declaration
9229 *
9230 * [32] SDDecl ::= S 'standalone' Eq
9231 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9232 *
9233 * [ VC: Standalone Document Declaration ]
9234 * TODO The standalone document declaration must have the value "no"
9235 * if any external markup declarations contain declarations of:
9236 * - attributes with default values, if elements to which these
9237 * attributes apply appear in the document without specifications
9238 * of values for these attributes, or
9239 * - entities (other than amp, lt, gt, apos, quot), if references
9240 * to those entities appear in the document, or
9241 * - attributes with values subject to normalization, where the
9242 * attribute appears in the document with a value which will change
9243 * as a result of normalization, or
9244 * - element types with element content, if white space occurs directly
9245 * within any instance of those types.
9246 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009247 * Returns:
9248 * 1 if standalone="yes"
9249 * 0 if standalone="no"
9250 * -2 if standalone attribute is missing or invalid
9251 * (A standalone value of -2 means that the XML declaration was found,
9252 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009253 */
9254
9255int
9256xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009257 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009258
9259 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009260 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009261 SKIP(10);
9262 SKIP_BLANKS;
9263 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009264 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009265 return(standalone);
9266 }
9267 NEXT;
9268 SKIP_BLANKS;
9269 if (RAW == '\''){
9270 NEXT;
9271 if ((RAW == 'n') && (NXT(1) == 'o')) {
9272 standalone = 0;
9273 SKIP(2);
9274 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9275 (NXT(2) == 's')) {
9276 standalone = 1;
9277 SKIP(3);
9278 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009279 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009280 }
9281 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009282 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009283 } else
9284 NEXT;
9285 } else if (RAW == '"'){
9286 NEXT;
9287 if ((RAW == 'n') && (NXT(1) == 'o')) {
9288 standalone = 0;
9289 SKIP(2);
9290 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9291 (NXT(2) == 's')) {
9292 standalone = 1;
9293 SKIP(3);
9294 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009295 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009296 }
9297 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009298 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009299 } else
9300 NEXT;
9301 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009302 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009303 }
9304 }
9305 return(standalone);
9306}
9307
9308/**
9309 * xmlParseXMLDecl:
9310 * @ctxt: an XML parser context
9311 *
9312 * parse an XML declaration header
9313 *
9314 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9315 */
9316
9317void
9318xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9319 xmlChar *version;
9320
9321 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009322 * This value for standalone indicates that the document has an
9323 * XML declaration but it does not have a standalone attribute.
9324 * It will be overwritten later if a standalone attribute is found.
9325 */
9326 ctxt->input->standalone = -2;
9327
9328 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009329 * We know that '<?xml' is here.
9330 */
9331 SKIP(5);
9332
William M. Brack76e95df2003-10-18 16:20:14 +00009333 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9335 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009336 }
9337 SKIP_BLANKS;
9338
9339 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009340 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009341 */
9342 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009343 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009344 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009345 } else {
9346 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9347 /*
9348 * TODO: Blueberry should be detected here
9349 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009350 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9351 "Unsupported version '%s'\n",
9352 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009353 }
9354 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009355 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009356 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009357 }
Owen Taylor3473f882001-02-23 17:55:21 +00009358
9359 /*
9360 * We may have the encoding declaration
9361 */
William M. Brack76e95df2003-10-18 16:20:14 +00009362 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009363 if ((RAW == '?') && (NXT(1) == '>')) {
9364 SKIP(2);
9365 return;
9366 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009368 }
9369 xmlParseEncodingDecl(ctxt);
9370 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9371 /*
9372 * The XML REC instructs us to stop parsing right here
9373 */
9374 return;
9375 }
9376
9377 /*
9378 * We may have the standalone status.
9379 */
William M. Brack76e95df2003-10-18 16:20:14 +00009380 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009381 if ((RAW == '?') && (NXT(1) == '>')) {
9382 SKIP(2);
9383 return;
9384 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009386 }
9387 SKIP_BLANKS;
9388 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9389
9390 SKIP_BLANKS;
9391 if ((RAW == '?') && (NXT(1) == '>')) {
9392 SKIP(2);
9393 } else if (RAW == '>') {
9394 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009395 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009396 NEXT;
9397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009398 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009399 MOVETO_ENDTAG(CUR_PTR);
9400 NEXT;
9401 }
9402}
9403
9404/**
9405 * xmlParseMisc:
9406 * @ctxt: an XML parser context
9407 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009408 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009409 *
9410 * [27] Misc ::= Comment | PI | S
9411 */
9412
9413void
9414xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009415 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009416 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009417 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009418 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009419 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009420 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009421 NEXT;
9422 } else
9423 xmlParseComment(ctxt);
9424 }
9425}
9426
9427/**
9428 * xmlParseDocument:
9429 * @ctxt: an XML parser context
9430 *
9431 * parse an XML document (and build a tree if using the standard SAX
9432 * interface).
9433 *
9434 * [1] document ::= prolog element Misc*
9435 *
9436 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9437 *
9438 * Returns 0, -1 in case of error. the parser context is augmented
9439 * as a result of the parsing.
9440 */
9441
9442int
9443xmlParseDocument(xmlParserCtxtPtr ctxt) {
9444 xmlChar start[4];
9445 xmlCharEncoding enc;
9446
9447 xmlInitParser();
9448
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009449 if ((ctxt == NULL) || (ctxt->input == NULL))
9450 return(-1);
9451
Owen Taylor3473f882001-02-23 17:55:21 +00009452 GROW;
9453
9454 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009455 * SAX: detecting the level.
9456 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009457 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009458
9459 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009460 * SAX: beginning of the document processing.
9461 */
9462 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9463 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9464
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009465 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9466 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009467 /*
9468 * Get the 4 first bytes and decode the charset
9469 * if enc != XML_CHAR_ENCODING_NONE
9470 * plug some encoding conversion routines.
9471 */
9472 start[0] = RAW;
9473 start[1] = NXT(1);
9474 start[2] = NXT(2);
9475 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009476 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009477 if (enc != XML_CHAR_ENCODING_NONE) {
9478 xmlSwitchEncoding(ctxt, enc);
9479 }
Owen Taylor3473f882001-02-23 17:55:21 +00009480 }
9481
9482
9483 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009484 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009485 }
9486
9487 /*
9488 * Check for the XMLDecl in the Prolog.
9489 */
9490 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009491 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009492
9493 /*
9494 * Note that we will switch encoding on the fly.
9495 */
9496 xmlParseXMLDecl(ctxt);
9497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9498 /*
9499 * The XML REC instructs us to stop parsing right here
9500 */
9501 return(-1);
9502 }
9503 ctxt->standalone = ctxt->input->standalone;
9504 SKIP_BLANKS;
9505 } else {
9506 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9507 }
9508 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9509 ctxt->sax->startDocument(ctxt->userData);
9510
9511 /*
9512 * The Misc part of the Prolog
9513 */
9514 GROW;
9515 xmlParseMisc(ctxt);
9516
9517 /*
9518 * Then possibly doc type declaration(s) and more Misc
9519 * (doctypedecl Misc*)?
9520 */
9521 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009522 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009523
9524 ctxt->inSubset = 1;
9525 xmlParseDocTypeDecl(ctxt);
9526 if (RAW == '[') {
9527 ctxt->instate = XML_PARSER_DTD;
9528 xmlParseInternalSubset(ctxt);
9529 }
9530
9531 /*
9532 * Create and update the external subset.
9533 */
9534 ctxt->inSubset = 2;
9535 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9536 (!ctxt->disableSAX))
9537 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9538 ctxt->extSubSystem, ctxt->extSubURI);
9539 ctxt->inSubset = 0;
9540
Daniel Veillardac4118d2008-01-11 05:27:32 +00009541 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009542
9543 ctxt->instate = XML_PARSER_PROLOG;
9544 xmlParseMisc(ctxt);
9545 }
9546
9547 /*
9548 * Time to start parsing the tree itself
9549 */
9550 GROW;
9551 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009552 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9553 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009554 } else {
9555 ctxt->instate = XML_PARSER_CONTENT;
9556 xmlParseElement(ctxt);
9557 ctxt->instate = XML_PARSER_EPILOG;
9558
9559
9560 /*
9561 * The Misc part at the end
9562 */
9563 xmlParseMisc(ctxt);
9564
Daniel Veillard561b7f82002-03-20 21:55:57 +00009565 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009566 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009567 }
9568 ctxt->instate = XML_PARSER_EOF;
9569 }
9570
9571 /*
9572 * SAX: end of the document processing.
9573 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009574 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009575 ctxt->sax->endDocument(ctxt->userData);
9576
Daniel Veillard5997aca2002-03-18 18:36:20 +00009577 /*
9578 * Remove locally kept entity definitions if the tree was not built
9579 */
9580 if ((ctxt->myDoc != NULL) &&
9581 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9582 xmlFreeDoc(ctxt->myDoc);
9583 ctxt->myDoc = NULL;
9584 }
9585
Daniel Veillardc7612992002-02-17 22:47:37 +00009586 if (! ctxt->wellFormed) {
9587 ctxt->valid = 0;
9588 return(-1);
9589 }
Owen Taylor3473f882001-02-23 17:55:21 +00009590 return(0);
9591}
9592
9593/**
9594 * xmlParseExtParsedEnt:
9595 * @ctxt: an XML parser context
9596 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009597 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009598 * An external general parsed entity is well-formed if it matches the
9599 * production labeled extParsedEnt.
9600 *
9601 * [78] extParsedEnt ::= TextDecl? content
9602 *
9603 * Returns 0, -1 in case of error. the parser context is augmented
9604 * as a result of the parsing.
9605 */
9606
9607int
9608xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9609 xmlChar start[4];
9610 xmlCharEncoding enc;
9611
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009612 if ((ctxt == NULL) || (ctxt->input == NULL))
9613 return(-1);
9614
Owen Taylor3473f882001-02-23 17:55:21 +00009615 xmlDefaultSAXHandlerInit();
9616
Daniel Veillard309f81d2003-09-23 09:02:53 +00009617 xmlDetectSAX2(ctxt);
9618
Owen Taylor3473f882001-02-23 17:55:21 +00009619 GROW;
9620
9621 /*
9622 * SAX: beginning of the document processing.
9623 */
9624 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9625 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9626
9627 /*
9628 * Get the 4 first bytes and decode the charset
9629 * if enc != XML_CHAR_ENCODING_NONE
9630 * plug some encoding conversion routines.
9631 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009632 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9633 start[0] = RAW;
9634 start[1] = NXT(1);
9635 start[2] = NXT(2);
9636 start[3] = NXT(3);
9637 enc = xmlDetectCharEncoding(start, 4);
9638 if (enc != XML_CHAR_ENCODING_NONE) {
9639 xmlSwitchEncoding(ctxt, enc);
9640 }
Owen Taylor3473f882001-02-23 17:55:21 +00009641 }
9642
9643
9644 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009645 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009646 }
9647
9648 /*
9649 * Check for the XMLDecl in the Prolog.
9650 */
9651 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009652 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009653
9654 /*
9655 * Note that we will switch encoding on the fly.
9656 */
9657 xmlParseXMLDecl(ctxt);
9658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9659 /*
9660 * The XML REC instructs us to stop parsing right here
9661 */
9662 return(-1);
9663 }
9664 SKIP_BLANKS;
9665 } else {
9666 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9667 }
9668 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9669 ctxt->sax->startDocument(ctxt->userData);
9670
9671 /*
9672 * Doing validity checking on chunk doesn't make sense
9673 */
9674 ctxt->instate = XML_PARSER_CONTENT;
9675 ctxt->validate = 0;
9676 ctxt->loadsubset = 0;
9677 ctxt->depth = 0;
9678
9679 xmlParseContent(ctxt);
9680
9681 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009683 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009684 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009685 }
9686
9687 /*
9688 * SAX: end of the document processing.
9689 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009690 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009691 ctxt->sax->endDocument(ctxt->userData);
9692
9693 if (! ctxt->wellFormed) return(-1);
9694 return(0);
9695}
9696
Daniel Veillard73b013f2003-09-30 12:36:01 +00009697#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009698/************************************************************************
9699 * *
9700 * Progressive parsing interfaces *
9701 * *
9702 ************************************************************************/
9703
9704/**
9705 * xmlParseLookupSequence:
9706 * @ctxt: an XML parser context
9707 * @first: the first char to lookup
9708 * @next: the next char to lookup or zero
9709 * @third: the next char to lookup or zero
9710 *
9711 * Try to find if a sequence (first, next, third) or just (first next) or
9712 * (first) is available in the input stream.
9713 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9714 * to avoid rescanning sequences of bytes, it DOES change the state of the
9715 * parser, do not use liberally.
9716 *
9717 * Returns the index to the current parsing point if the full sequence
9718 * is available, -1 otherwise.
9719 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009720static int
Owen Taylor3473f882001-02-23 17:55:21 +00009721xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9722 xmlChar next, xmlChar third) {
9723 int base, len;
9724 xmlParserInputPtr in;
9725 const xmlChar *buf;
9726
9727 in = ctxt->input;
9728 if (in == NULL) return(-1);
9729 base = in->cur - in->base;
9730 if (base < 0) return(-1);
9731 if (ctxt->checkIndex > base)
9732 base = ctxt->checkIndex;
9733 if (in->buf == NULL) {
9734 buf = in->base;
9735 len = in->length;
9736 } else {
9737 buf = in->buf->buffer->content;
9738 len = in->buf->buffer->use;
9739 }
9740 /* take into account the sequence length */
9741 if (third) len -= 2;
9742 else if (next) len --;
9743 for (;base < len;base++) {
9744 if (buf[base] == first) {
9745 if (third != 0) {
9746 if ((buf[base + 1] != next) ||
9747 (buf[base + 2] != third)) continue;
9748 } else if (next != 0) {
9749 if (buf[base + 1] != next) continue;
9750 }
9751 ctxt->checkIndex = 0;
9752#ifdef DEBUG_PUSH
9753 if (next == 0)
9754 xmlGenericError(xmlGenericErrorContext,
9755 "PP: lookup '%c' found at %d\n",
9756 first, base);
9757 else if (third == 0)
9758 xmlGenericError(xmlGenericErrorContext,
9759 "PP: lookup '%c%c' found at %d\n",
9760 first, next, base);
9761 else
9762 xmlGenericError(xmlGenericErrorContext,
9763 "PP: lookup '%c%c%c' found at %d\n",
9764 first, next, third, base);
9765#endif
9766 return(base - (in->cur - in->base));
9767 }
9768 }
9769 ctxt->checkIndex = base;
9770#ifdef DEBUG_PUSH
9771 if (next == 0)
9772 xmlGenericError(xmlGenericErrorContext,
9773 "PP: lookup '%c' failed\n", first);
9774 else if (third == 0)
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: lookup '%c%c' failed\n", first, next);
9777 else
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: lookup '%c%c%c' failed\n", first, next, third);
9780#endif
9781 return(-1);
9782}
9783
9784/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009785 * xmlParseGetLasts:
9786 * @ctxt: an XML parser context
9787 * @lastlt: pointer to store the last '<' from the input
9788 * @lastgt: pointer to store the last '>' from the input
9789 *
9790 * Lookup the last < and > in the current chunk
9791 */
9792static void
9793xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9794 const xmlChar **lastgt) {
9795 const xmlChar *tmp;
9796
9797 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9798 xmlGenericError(xmlGenericErrorContext,
9799 "Internal error: xmlParseGetLasts\n");
9800 return;
9801 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009802 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009803 tmp = ctxt->input->end;
9804 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009805 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 if (tmp < ctxt->input->base) {
9807 *lastlt = NULL;
9808 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009810 *lastlt = tmp;
9811 tmp++;
9812 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9813 if (*tmp == '\'') {
9814 tmp++;
9815 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9816 if (tmp < ctxt->input->end) tmp++;
9817 } else if (*tmp == '"') {
9818 tmp++;
9819 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9820 if (tmp < ctxt->input->end) tmp++;
9821 } else
9822 tmp++;
9823 }
9824 if (tmp < ctxt->input->end)
9825 *lastgt = tmp;
9826 else {
9827 tmp = *lastlt;
9828 tmp--;
9829 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9830 if (tmp >= ctxt->input->base)
9831 *lastgt = tmp;
9832 else
9833 *lastgt = NULL;
9834 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009835 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009836 } else {
9837 *lastlt = NULL;
9838 *lastgt = NULL;
9839 }
9840}
9841/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009842 * xmlCheckCdataPush:
9843 * @cur: pointer to the bock of characters
9844 * @len: length of the block in bytes
9845 *
9846 * Check that the block of characters is okay as SCdata content [20]
9847 *
9848 * Returns the number of bytes to pass if okay, a negative index where an
9849 * UTF-8 error occured otherwise
9850 */
9851static int
9852xmlCheckCdataPush(const xmlChar *utf, int len) {
9853 int ix;
9854 unsigned char c;
9855 int codepoint;
9856
9857 if ((utf == NULL) || (len <= 0))
9858 return(0);
9859
9860 for (ix = 0; ix < len;) { /* string is 0-terminated */
9861 c = utf[ix];
9862 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9863 if (c >= 0x20)
9864 ix++;
9865 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9866 ix++;
9867 else
9868 return(-ix);
9869 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9870 if (ix + 2 > len) return(ix);
9871 if ((utf[ix+1] & 0xc0 ) != 0x80)
9872 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009873 codepoint = (utf[ix] & 0x1f) << 6;
9874 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009875 if (!xmlIsCharQ(codepoint))
9876 return(-ix);
9877 ix += 2;
9878 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9879 if (ix + 3 > len) return(ix);
9880 if (((utf[ix+1] & 0xc0) != 0x80) ||
9881 ((utf[ix+2] & 0xc0) != 0x80))
9882 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009883 codepoint = (utf[ix] & 0xf) << 12;
9884 codepoint |= (utf[ix+1] & 0x3f) << 6;
9885 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009886 if (!xmlIsCharQ(codepoint))
9887 return(-ix);
9888 ix += 3;
9889 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9890 if (ix + 4 > len) return(ix);
9891 if (((utf[ix+1] & 0xc0) != 0x80) ||
9892 ((utf[ix+2] & 0xc0) != 0x80) ||
9893 ((utf[ix+3] & 0xc0) != 0x80))
9894 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009895 codepoint = (utf[ix] & 0x7) << 18;
9896 codepoint |= (utf[ix+1] & 0x3f) << 12;
9897 codepoint |= (utf[ix+2] & 0x3f) << 6;
9898 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009899 if (!xmlIsCharQ(codepoint))
9900 return(-ix);
9901 ix += 4;
9902 } else /* unknown encoding */
9903 return(-ix);
9904 }
9905 return(ix);
9906}
9907
9908/**
Owen Taylor3473f882001-02-23 17:55:21 +00009909 * xmlParseTryOrFinish:
9910 * @ctxt: an XML parser context
9911 * @terminate: last chunk indicator
9912 *
9913 * Try to progress on parsing
9914 *
9915 * Returns zero if no parsing was possible
9916 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009917static int
Owen Taylor3473f882001-02-23 17:55:21 +00009918xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9919 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009920 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009921 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009922 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009923
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009924 if (ctxt->input == NULL)
9925 return(0);
9926
Owen Taylor3473f882001-02-23 17:55:21 +00009927#ifdef DEBUG_PUSH
9928 switch (ctxt->instate) {
9929 case XML_PARSER_EOF:
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: try EOF\n"); break;
9932 case XML_PARSER_START:
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: try START\n"); break;
9935 case XML_PARSER_MISC:
9936 xmlGenericError(xmlGenericErrorContext,
9937 "PP: try MISC\n");break;
9938 case XML_PARSER_COMMENT:
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: try COMMENT\n");break;
9941 case XML_PARSER_PROLOG:
9942 xmlGenericError(xmlGenericErrorContext,
9943 "PP: try PROLOG\n");break;
9944 case XML_PARSER_START_TAG:
9945 xmlGenericError(xmlGenericErrorContext,
9946 "PP: try START_TAG\n");break;
9947 case XML_PARSER_CONTENT:
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: try CONTENT\n");break;
9950 case XML_PARSER_CDATA_SECTION:
9951 xmlGenericError(xmlGenericErrorContext,
9952 "PP: try CDATA_SECTION\n");break;
9953 case XML_PARSER_END_TAG:
9954 xmlGenericError(xmlGenericErrorContext,
9955 "PP: try END_TAG\n");break;
9956 case XML_PARSER_ENTITY_DECL:
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: try ENTITY_DECL\n");break;
9959 case XML_PARSER_ENTITY_VALUE:
9960 xmlGenericError(xmlGenericErrorContext,
9961 "PP: try ENTITY_VALUE\n");break;
9962 case XML_PARSER_ATTRIBUTE_VALUE:
9963 xmlGenericError(xmlGenericErrorContext,
9964 "PP: try ATTRIBUTE_VALUE\n");break;
9965 case XML_PARSER_DTD:
9966 xmlGenericError(xmlGenericErrorContext,
9967 "PP: try DTD\n");break;
9968 case XML_PARSER_EPILOG:
9969 xmlGenericError(xmlGenericErrorContext,
9970 "PP: try EPILOG\n");break;
9971 case XML_PARSER_PI:
9972 xmlGenericError(xmlGenericErrorContext,
9973 "PP: try PI\n");break;
9974 case XML_PARSER_IGNORE:
9975 xmlGenericError(xmlGenericErrorContext,
9976 "PP: try IGNORE\n");break;
9977 }
9978#endif
9979
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009980 if ((ctxt->input != NULL) &&
9981 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009982 xmlSHRINK(ctxt);
9983 ctxt->checkIndex = 0;
9984 }
9985 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009986
Daniel Veillarda880b122003-04-21 21:36:41 +00009987 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009988 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009989 return(0);
9990
9991
Owen Taylor3473f882001-02-23 17:55:21 +00009992 /*
9993 * Pop-up of finished entities.
9994 */
9995 while ((RAW == 0) && (ctxt->inputNr > 1))
9996 xmlPopInput(ctxt);
9997
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009998 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009999 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010000 avail = ctxt->input->length -
10001 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010002 else {
10003 /*
10004 * If we are operating on converted input, try to flush
10005 * remainng chars to avoid them stalling in the non-converted
10006 * buffer.
10007 */
10008 if ((ctxt->input->buf->raw != NULL) &&
10009 (ctxt->input->buf->raw->use > 0)) {
10010 int base = ctxt->input->base -
10011 ctxt->input->buf->buffer->content;
10012 int current = ctxt->input->cur - ctxt->input->base;
10013
10014 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10015 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10016 ctxt->input->cur = ctxt->input->base + current;
10017 ctxt->input->end =
10018 &ctxt->input->buf->buffer->content[
10019 ctxt->input->buf->buffer->use];
10020 }
10021 avail = ctxt->input->buf->buffer->use -
10022 (ctxt->input->cur - ctxt->input->base);
10023 }
Owen Taylor3473f882001-02-23 17:55:21 +000010024 if (avail < 1)
10025 goto done;
10026 switch (ctxt->instate) {
10027 case XML_PARSER_EOF:
10028 /*
10029 * Document parsing is done !
10030 */
10031 goto done;
10032 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010033 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10034 xmlChar start[4];
10035 xmlCharEncoding enc;
10036
10037 /*
10038 * Very first chars read from the document flow.
10039 */
10040 if (avail < 4)
10041 goto done;
10042
10043 /*
10044 * Get the 4 first bytes and decode the charset
10045 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010046 * plug some encoding conversion routines,
10047 * else xmlSwitchEncoding will set to (default)
10048 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010049 */
10050 start[0] = RAW;
10051 start[1] = NXT(1);
10052 start[2] = NXT(2);
10053 start[3] = NXT(3);
10054 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010055 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010056 break;
10057 }
Owen Taylor3473f882001-02-23 17:55:21 +000010058
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010059 if (avail < 2)
10060 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010061 cur = ctxt->input->cur[0];
10062 next = ctxt->input->cur[1];
10063 if (cur == 0) {
10064 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10065 ctxt->sax->setDocumentLocator(ctxt->userData,
10066 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010067 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010068 ctxt->instate = XML_PARSER_EOF;
10069#ifdef DEBUG_PUSH
10070 xmlGenericError(xmlGenericErrorContext,
10071 "PP: entering EOF\n");
10072#endif
10073 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10074 ctxt->sax->endDocument(ctxt->userData);
10075 goto done;
10076 }
10077 if ((cur == '<') && (next == '?')) {
10078 /* PI or XML decl */
10079 if (avail < 5) return(ret);
10080 if ((!terminate) &&
10081 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10082 return(ret);
10083 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10084 ctxt->sax->setDocumentLocator(ctxt->userData,
10085 &xmlDefaultSAXLocator);
10086 if ((ctxt->input->cur[2] == 'x') &&
10087 (ctxt->input->cur[3] == 'm') &&
10088 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010089 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010090 ret += 5;
10091#ifdef DEBUG_PUSH
10092 xmlGenericError(xmlGenericErrorContext,
10093 "PP: Parsing XML Decl\n");
10094#endif
10095 xmlParseXMLDecl(ctxt);
10096 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10097 /*
10098 * The XML REC instructs us to stop parsing right
10099 * here
10100 */
10101 ctxt->instate = XML_PARSER_EOF;
10102 return(0);
10103 }
10104 ctxt->standalone = ctxt->input->standalone;
10105 if ((ctxt->encoding == NULL) &&
10106 (ctxt->input->encoding != NULL))
10107 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10108 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10109 (!ctxt->disableSAX))
10110 ctxt->sax->startDocument(ctxt->userData);
10111 ctxt->instate = XML_PARSER_MISC;
10112#ifdef DEBUG_PUSH
10113 xmlGenericError(xmlGenericErrorContext,
10114 "PP: entering MISC\n");
10115#endif
10116 } else {
10117 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10118 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10119 (!ctxt->disableSAX))
10120 ctxt->sax->startDocument(ctxt->userData);
10121 ctxt->instate = XML_PARSER_MISC;
10122#ifdef DEBUG_PUSH
10123 xmlGenericError(xmlGenericErrorContext,
10124 "PP: entering MISC\n");
10125#endif
10126 }
10127 } else {
10128 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10129 ctxt->sax->setDocumentLocator(ctxt->userData,
10130 &xmlDefaultSAXLocator);
10131 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010132 if (ctxt->version == NULL) {
10133 xmlErrMemory(ctxt, NULL);
10134 break;
10135 }
Owen Taylor3473f882001-02-23 17:55:21 +000010136 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10137 (!ctxt->disableSAX))
10138 ctxt->sax->startDocument(ctxt->userData);
10139 ctxt->instate = XML_PARSER_MISC;
10140#ifdef DEBUG_PUSH
10141 xmlGenericError(xmlGenericErrorContext,
10142 "PP: entering MISC\n");
10143#endif
10144 }
10145 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010146 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010147 const xmlChar *name;
10148 const xmlChar *prefix;
10149 const xmlChar *URI;
10150 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010151
10152 if ((avail < 2) && (ctxt->inputNr == 1))
10153 goto done;
10154 cur = ctxt->input->cur[0];
10155 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010156 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010157 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010158 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10159 ctxt->sax->endDocument(ctxt->userData);
10160 goto done;
10161 }
10162 if (!terminate) {
10163 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010164 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010165 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010166 goto done;
10167 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10168 goto done;
10169 }
10170 }
10171 if (ctxt->spaceNr == 0)
10172 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010173 else if (*ctxt->space == -2)
10174 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010175 else
10176 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010177#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010178 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010179#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010180 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010181#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010182 else
10183 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010184#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010185 if (name == NULL) {
10186 spacePop(ctxt);
10187 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010188 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10189 ctxt->sax->endDocument(ctxt->userData);
10190 goto done;
10191 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010192#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010193 /*
10194 * [ VC: Root Element Type ]
10195 * The Name in the document type declaration must match
10196 * the element type of the root element.
10197 */
10198 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10199 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10200 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010201#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010202
10203 /*
10204 * Check for an Empty Element.
10205 */
10206 if ((RAW == '/') && (NXT(1) == '>')) {
10207 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010208
10209 if (ctxt->sax2) {
10210 if ((ctxt->sax != NULL) &&
10211 (ctxt->sax->endElementNs != NULL) &&
10212 (!ctxt->disableSAX))
10213 ctxt->sax->endElementNs(ctxt->userData, name,
10214 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010215 if (ctxt->nsNr - nsNr > 0)
10216 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010217#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010218 } else {
10219 if ((ctxt->sax != NULL) &&
10220 (ctxt->sax->endElement != NULL) &&
10221 (!ctxt->disableSAX))
10222 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010223#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010224 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010225 spacePop(ctxt);
10226 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010227 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010228 } else {
10229 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010230 }
10231 break;
10232 }
10233 if (RAW == '>') {
10234 NEXT;
10235 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010236 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010237 "Couldn't find end of Start Tag %s\n",
10238 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010239 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010240 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010241 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010242 if (ctxt->sax2)
10243 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010244#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010245 else
10246 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010247#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010248
Daniel Veillarda880b122003-04-21 21:36:41 +000010249 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010250 break;
10251 }
10252 case XML_PARSER_CONTENT: {
10253 const xmlChar *test;
10254 unsigned int cons;
10255 if ((avail < 2) && (ctxt->inputNr == 1))
10256 goto done;
10257 cur = ctxt->input->cur[0];
10258 next = ctxt->input->cur[1];
10259
10260 test = CUR_PTR;
10261 cons = ctxt->input->consumed;
10262 if ((cur == '<') && (next == '/')) {
10263 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010264 break;
10265 } else if ((cur == '<') && (next == '?')) {
10266 if ((!terminate) &&
10267 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10268 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010269 xmlParsePI(ctxt);
10270 } else if ((cur == '<') && (next != '!')) {
10271 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010272 break;
10273 } else if ((cur == '<') && (next == '!') &&
10274 (ctxt->input->cur[2] == '-') &&
10275 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010276 int term;
10277
10278 if (avail < 4)
10279 goto done;
10280 ctxt->input->cur += 4;
10281 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10282 ctxt->input->cur -= 4;
10283 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010284 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010285 xmlParseComment(ctxt);
10286 ctxt->instate = XML_PARSER_CONTENT;
10287 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10288 (ctxt->input->cur[2] == '[') &&
10289 (ctxt->input->cur[3] == 'C') &&
10290 (ctxt->input->cur[4] == 'D') &&
10291 (ctxt->input->cur[5] == 'A') &&
10292 (ctxt->input->cur[6] == 'T') &&
10293 (ctxt->input->cur[7] == 'A') &&
10294 (ctxt->input->cur[8] == '[')) {
10295 SKIP(9);
10296 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010297 break;
10298 } else if ((cur == '<') && (next == '!') &&
10299 (avail < 9)) {
10300 goto done;
10301 } else if (cur == '&') {
10302 if ((!terminate) &&
10303 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10304 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010305 xmlParseReference(ctxt);
10306 } else {
10307 /* TODO Avoid the extra copy, handle directly !!! */
10308 /*
10309 * Goal of the following test is:
10310 * - minimize calls to the SAX 'character' callback
10311 * when they are mergeable
10312 * - handle an problem for isBlank when we only parse
10313 * a sequence of blank chars and the next one is
10314 * not available to check against '<' presence.
10315 * - tries to homogenize the differences in SAX
10316 * callbacks between the push and pull versions
10317 * of the parser.
10318 */
10319 if ((ctxt->inputNr == 1) &&
10320 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10321 if (!terminate) {
10322 if (ctxt->progressive) {
10323 if ((lastlt == NULL) ||
10324 (ctxt->input->cur > lastlt))
10325 goto done;
10326 } else if (xmlParseLookupSequence(ctxt,
10327 '<', 0, 0) < 0) {
10328 goto done;
10329 }
10330 }
10331 }
10332 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010333 xmlParseCharData(ctxt, 0);
10334 }
10335 /*
10336 * Pop-up of finished entities.
10337 */
10338 while ((RAW == 0) && (ctxt->inputNr > 1))
10339 xmlPopInput(ctxt);
10340 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010341 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10342 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010343 ctxt->instate = XML_PARSER_EOF;
10344 break;
10345 }
10346 break;
10347 }
10348 case XML_PARSER_END_TAG:
10349 if (avail < 2)
10350 goto done;
10351 if (!terminate) {
10352 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010353 /* > can be found unescaped in attribute values */
10354 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010355 goto done;
10356 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10357 goto done;
10358 }
10359 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010360 if (ctxt->sax2) {
10361 xmlParseEndTag2(ctxt,
10362 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10363 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010364 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010365 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010366 }
10367#ifdef LIBXML_SAX1_ENABLED
10368 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010369 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010370#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010371 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010372 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010373 } else {
10374 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010375 }
10376 break;
10377 case XML_PARSER_CDATA_SECTION: {
10378 /*
10379 * The Push mode need to have the SAX callback for
10380 * cdataBlock merge back contiguous callbacks.
10381 */
10382 int base;
10383
10384 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10385 if (base < 0) {
10386 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010387 int tmp;
10388
10389 tmp = xmlCheckCdataPush(ctxt->input->cur,
10390 XML_PARSER_BIG_BUFFER_SIZE);
10391 if (tmp < 0) {
10392 tmp = -tmp;
10393 ctxt->input->cur += tmp;
10394 goto encoding_error;
10395 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10397 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010398 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010399 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010400 else if (ctxt->sax->characters != NULL)
10401 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010402 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010403 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010404 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010405 ctxt->checkIndex = 0;
10406 }
10407 goto done;
10408 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010409 int tmp;
10410
10411 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10412 if ((tmp < 0) || (tmp != base)) {
10413 tmp = -tmp;
10414 ctxt->input->cur += tmp;
10415 goto encoding_error;
10416 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010417 if ((ctxt->sax != NULL) && (base == 0) &&
10418 (ctxt->sax->cdataBlock != NULL) &&
10419 (!ctxt->disableSAX)) {
10420 /*
10421 * Special case to provide identical behaviour
10422 * between pull and push parsers on enpty CDATA
10423 * sections
10424 */
10425 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10426 (!strncmp((const char *)&ctxt->input->cur[-9],
10427 "<![CDATA[", 9)))
10428 ctxt->sax->cdataBlock(ctxt->userData,
10429 BAD_CAST "", 0);
10430 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010431 (!ctxt->disableSAX)) {
10432 if (ctxt->sax->cdataBlock != NULL)
10433 ctxt->sax->cdataBlock(ctxt->userData,
10434 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010435 else if (ctxt->sax->characters != NULL)
10436 ctxt->sax->characters(ctxt->userData,
10437 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010438 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010439 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010440 ctxt->checkIndex = 0;
10441 ctxt->instate = XML_PARSER_CONTENT;
10442#ifdef DEBUG_PUSH
10443 xmlGenericError(xmlGenericErrorContext,
10444 "PP: entering CONTENT\n");
10445#endif
10446 }
10447 break;
10448 }
Owen Taylor3473f882001-02-23 17:55:21 +000010449 case XML_PARSER_MISC:
10450 SKIP_BLANKS;
10451 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010452 avail = ctxt->input->length -
10453 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010454 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010455 avail = ctxt->input->buf->buffer->use -
10456 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010457 if (avail < 2)
10458 goto done;
10459 cur = ctxt->input->cur[0];
10460 next = ctxt->input->cur[1];
10461 if ((cur == '<') && (next == '?')) {
10462 if ((!terminate) &&
10463 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10464 goto done;
10465#ifdef DEBUG_PUSH
10466 xmlGenericError(xmlGenericErrorContext,
10467 "PP: Parsing PI\n");
10468#endif
10469 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010470 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010471 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010472 (ctxt->input->cur[2] == '-') &&
10473 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010474 if ((!terminate) &&
10475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10476 goto done;
10477#ifdef DEBUG_PUSH
10478 xmlGenericError(xmlGenericErrorContext,
10479 "PP: Parsing Comment\n");
10480#endif
10481 xmlParseComment(ctxt);
10482 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010483 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010484 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010485 (ctxt->input->cur[2] == 'D') &&
10486 (ctxt->input->cur[3] == 'O') &&
10487 (ctxt->input->cur[4] == 'C') &&
10488 (ctxt->input->cur[5] == 'T') &&
10489 (ctxt->input->cur[6] == 'Y') &&
10490 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010491 (ctxt->input->cur[8] == 'E')) {
10492 if ((!terminate) &&
10493 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10494 goto done;
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext,
10497 "PP: Parsing internal subset\n");
10498#endif
10499 ctxt->inSubset = 1;
10500 xmlParseDocTypeDecl(ctxt);
10501 if (RAW == '[') {
10502 ctxt->instate = XML_PARSER_DTD;
10503#ifdef DEBUG_PUSH
10504 xmlGenericError(xmlGenericErrorContext,
10505 "PP: entering DTD\n");
10506#endif
10507 } else {
10508 /*
10509 * Create and update the external subset.
10510 */
10511 ctxt->inSubset = 2;
10512 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10513 (ctxt->sax->externalSubset != NULL))
10514 ctxt->sax->externalSubset(ctxt->userData,
10515 ctxt->intSubName, ctxt->extSubSystem,
10516 ctxt->extSubURI);
10517 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010518 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010519 ctxt->instate = XML_PARSER_PROLOG;
10520#ifdef DEBUG_PUSH
10521 xmlGenericError(xmlGenericErrorContext,
10522 "PP: entering PROLOG\n");
10523#endif
10524 }
10525 } else if ((cur == '<') && (next == '!') &&
10526 (avail < 9)) {
10527 goto done;
10528 } else {
10529 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010530 ctxt->progressive = 1;
10531 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010532#ifdef DEBUG_PUSH
10533 xmlGenericError(xmlGenericErrorContext,
10534 "PP: entering START_TAG\n");
10535#endif
10536 }
10537 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010538 case XML_PARSER_PROLOG:
10539 SKIP_BLANKS;
10540 if (ctxt->input->buf == NULL)
10541 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10542 else
10543 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10544 if (avail < 2)
10545 goto done;
10546 cur = ctxt->input->cur[0];
10547 next = ctxt->input->cur[1];
10548 if ((cur == '<') && (next == '?')) {
10549 if ((!terminate) &&
10550 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10551 goto done;
10552#ifdef DEBUG_PUSH
10553 xmlGenericError(xmlGenericErrorContext,
10554 "PP: Parsing PI\n");
10555#endif
10556 xmlParsePI(ctxt);
10557 } else if ((cur == '<') && (next == '!') &&
10558 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10559 if ((!terminate) &&
10560 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10561 goto done;
10562#ifdef DEBUG_PUSH
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: Parsing Comment\n");
10565#endif
10566 xmlParseComment(ctxt);
10567 ctxt->instate = XML_PARSER_PROLOG;
10568 } else if ((cur == '<') && (next == '!') &&
10569 (avail < 4)) {
10570 goto done;
10571 } else {
10572 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010573 if (ctxt->progressive == 0)
10574 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010575 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010576#ifdef DEBUG_PUSH
10577 xmlGenericError(xmlGenericErrorContext,
10578 "PP: entering START_TAG\n");
10579#endif
10580 }
10581 break;
10582 case XML_PARSER_EPILOG:
10583 SKIP_BLANKS;
10584 if (ctxt->input->buf == NULL)
10585 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10586 else
10587 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10588 if (avail < 2)
10589 goto done;
10590 cur = ctxt->input->cur[0];
10591 next = ctxt->input->cur[1];
10592 if ((cur == '<') && (next == '?')) {
10593 if ((!terminate) &&
10594 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10595 goto done;
10596#ifdef DEBUG_PUSH
10597 xmlGenericError(xmlGenericErrorContext,
10598 "PP: Parsing PI\n");
10599#endif
10600 xmlParsePI(ctxt);
10601 ctxt->instate = XML_PARSER_EPILOG;
10602 } else if ((cur == '<') && (next == '!') &&
10603 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10604 if ((!terminate) &&
10605 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10606 goto done;
10607#ifdef DEBUG_PUSH
10608 xmlGenericError(xmlGenericErrorContext,
10609 "PP: Parsing Comment\n");
10610#endif
10611 xmlParseComment(ctxt);
10612 ctxt->instate = XML_PARSER_EPILOG;
10613 } else if ((cur == '<') && (next == '!') &&
10614 (avail < 4)) {
10615 goto done;
10616 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010617 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010618 ctxt->instate = XML_PARSER_EOF;
10619#ifdef DEBUG_PUSH
10620 xmlGenericError(xmlGenericErrorContext,
10621 "PP: entering EOF\n");
10622#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010623 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010624 ctxt->sax->endDocument(ctxt->userData);
10625 goto done;
10626 }
10627 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010628 case XML_PARSER_DTD: {
10629 /*
10630 * Sorry but progressive parsing of the internal subset
10631 * is not expected to be supported. We first check that
10632 * the full content of the internal subset is available and
10633 * the parsing is launched only at that point.
10634 * Internal subset ends up with "']' S? '>'" in an unescaped
10635 * section and not in a ']]>' sequence which are conditional
10636 * sections (whoever argued to keep that crap in XML deserve
10637 * a place in hell !).
10638 */
10639 int base, i;
10640 xmlChar *buf;
10641 xmlChar quote = 0;
10642
10643 base = ctxt->input->cur - ctxt->input->base;
10644 if (base < 0) return(0);
10645 if (ctxt->checkIndex > base)
10646 base = ctxt->checkIndex;
10647 buf = ctxt->input->buf->buffer->content;
10648 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10649 base++) {
10650 if (quote != 0) {
10651 if (buf[base] == quote)
10652 quote = 0;
10653 continue;
10654 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010655 if ((quote == 0) && (buf[base] == '<')) {
10656 int found = 0;
10657 /* special handling of comments */
10658 if (((unsigned int) base + 4 <
10659 ctxt->input->buf->buffer->use) &&
10660 (buf[base + 1] == '!') &&
10661 (buf[base + 2] == '-') &&
10662 (buf[base + 3] == '-')) {
10663 for (;(unsigned int) base + 3 <
10664 ctxt->input->buf->buffer->use; base++) {
10665 if ((buf[base] == '-') &&
10666 (buf[base + 1] == '-') &&
10667 (buf[base + 2] == '>')) {
10668 found = 1;
10669 base += 2;
10670 break;
10671 }
10672 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010673 if (!found) {
10674#if 0
10675 fprintf(stderr, "unfinished comment\n");
10676#endif
10677 break; /* for */
10678 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010679 continue;
10680 }
10681 }
Owen Taylor3473f882001-02-23 17:55:21 +000010682 if (buf[base] == '"') {
10683 quote = '"';
10684 continue;
10685 }
10686 if (buf[base] == '\'') {
10687 quote = '\'';
10688 continue;
10689 }
10690 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010691#if 0
10692 fprintf(stderr, "%c%c%c%c: ", buf[base],
10693 buf[base + 1], buf[base + 2], buf[base + 3]);
10694#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010695 if ((unsigned int) base +1 >=
10696 ctxt->input->buf->buffer->use)
10697 break;
10698 if (buf[base + 1] == ']') {
10699 /* conditional crap, skip both ']' ! */
10700 base++;
10701 continue;
10702 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010703 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010704 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10705 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010706 if (buf[base + i] == '>') {
10707#if 0
10708 fprintf(stderr, "found\n");
10709#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010710 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010711 }
10712 if (!IS_BLANK_CH(buf[base + i])) {
10713#if 0
10714 fprintf(stderr, "not found\n");
10715#endif
10716 goto not_end_of_int_subset;
10717 }
Owen Taylor3473f882001-02-23 17:55:21 +000010718 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010719#if 0
10720 fprintf(stderr, "end of stream\n");
10721#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010722 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010723
Owen Taylor3473f882001-02-23 17:55:21 +000010724 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010725not_end_of_int_subset:
10726 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010727 }
10728 /*
10729 * We didn't found the end of the Internal subset
10730 */
Owen Taylor3473f882001-02-23 17:55:21 +000010731#ifdef DEBUG_PUSH
10732 if (next == 0)
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: lookup of int subset end filed\n");
10735#endif
10736 goto done;
10737
10738found_end_int_subset:
10739 xmlParseInternalSubset(ctxt);
10740 ctxt->inSubset = 2;
10741 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10742 (ctxt->sax->externalSubset != NULL))
10743 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10744 ctxt->extSubSystem, ctxt->extSubURI);
10745 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010746 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010747 ctxt->instate = XML_PARSER_PROLOG;
10748 ctxt->checkIndex = 0;
10749#ifdef DEBUG_PUSH
10750 xmlGenericError(xmlGenericErrorContext,
10751 "PP: entering PROLOG\n");
10752#endif
10753 break;
10754 }
10755 case XML_PARSER_COMMENT:
10756 xmlGenericError(xmlGenericErrorContext,
10757 "PP: internal error, state == COMMENT\n");
10758 ctxt->instate = XML_PARSER_CONTENT;
10759#ifdef DEBUG_PUSH
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: entering CONTENT\n");
10762#endif
10763 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010764 case XML_PARSER_IGNORE:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: internal error, state == IGNORE");
10767 ctxt->instate = XML_PARSER_DTD;
10768#ifdef DEBUG_PUSH
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: entering DTD\n");
10771#endif
10772 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010773 case XML_PARSER_PI:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: internal error, state == PI\n");
10776 ctxt->instate = XML_PARSER_CONTENT;
10777#ifdef DEBUG_PUSH
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: entering CONTENT\n");
10780#endif
10781 break;
10782 case XML_PARSER_ENTITY_DECL:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: internal error, state == ENTITY_DECL\n");
10785 ctxt->instate = XML_PARSER_DTD;
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext,
10788 "PP: entering DTD\n");
10789#endif
10790 break;
10791 case XML_PARSER_ENTITY_VALUE:
10792 xmlGenericError(xmlGenericErrorContext,
10793 "PP: internal error, state == ENTITY_VALUE\n");
10794 ctxt->instate = XML_PARSER_CONTENT;
10795#ifdef DEBUG_PUSH
10796 xmlGenericError(xmlGenericErrorContext,
10797 "PP: entering DTD\n");
10798#endif
10799 break;
10800 case XML_PARSER_ATTRIBUTE_VALUE:
10801 xmlGenericError(xmlGenericErrorContext,
10802 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10803 ctxt->instate = XML_PARSER_START_TAG;
10804#ifdef DEBUG_PUSH
10805 xmlGenericError(xmlGenericErrorContext,
10806 "PP: entering START_TAG\n");
10807#endif
10808 break;
10809 case XML_PARSER_SYSTEM_LITERAL:
10810 xmlGenericError(xmlGenericErrorContext,
10811 "PP: internal error, state == SYSTEM_LITERAL\n");
10812 ctxt->instate = XML_PARSER_START_TAG;
10813#ifdef DEBUG_PUSH
10814 xmlGenericError(xmlGenericErrorContext,
10815 "PP: entering START_TAG\n");
10816#endif
10817 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010818 case XML_PARSER_PUBLIC_LITERAL:
10819 xmlGenericError(xmlGenericErrorContext,
10820 "PP: internal error, state == PUBLIC_LITERAL\n");
10821 ctxt->instate = XML_PARSER_START_TAG;
10822#ifdef DEBUG_PUSH
10823 xmlGenericError(xmlGenericErrorContext,
10824 "PP: entering START_TAG\n");
10825#endif
10826 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010827 }
10828 }
10829done:
10830#ifdef DEBUG_PUSH
10831 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10832#endif
10833 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010834encoding_error:
10835 {
10836 char buffer[150];
10837
10838 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10839 ctxt->input->cur[0], ctxt->input->cur[1],
10840 ctxt->input->cur[2], ctxt->input->cur[3]);
10841 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10842 "Input is not proper UTF-8, indicate encoding !\n%s",
10843 BAD_CAST buffer, NULL);
10844 }
10845 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010846}
10847
10848/**
Owen Taylor3473f882001-02-23 17:55:21 +000010849 * xmlParseChunk:
10850 * @ctxt: an XML parser context
10851 * @chunk: an char array
10852 * @size: the size in byte of the chunk
10853 * @terminate: last chunk indicator
10854 *
10855 * Parse a Chunk of memory
10856 *
10857 * Returns zero if no error, the xmlParserErrors otherwise.
10858 */
10859int
10860xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10861 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010862 int end_in_lf = 0;
10863
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010864 if (ctxt == NULL)
10865 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010866 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010867 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010868 if (ctxt->instate == XML_PARSER_START)
10869 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010870 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10871 (chunk[size - 1] == '\r')) {
10872 end_in_lf = 1;
10873 size--;
10874 }
Owen Taylor3473f882001-02-23 17:55:21 +000010875 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10876 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10877 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10878 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010879 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010880
William M. Bracka3215c72004-07-31 16:24:01 +000010881 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10882 if (res < 0) {
10883 ctxt->errNo = XML_PARSER_EOF;
10884 ctxt->disableSAX = 1;
10885 return (XML_PARSER_EOF);
10886 }
Owen Taylor3473f882001-02-23 17:55:21 +000010887 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10888 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010889 ctxt->input->end =
10890 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010891#ifdef DEBUG_PUSH
10892 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10893#endif
10894
Owen Taylor3473f882001-02-23 17:55:21 +000010895 } else if (ctxt->instate != XML_PARSER_EOF) {
10896 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10897 xmlParserInputBufferPtr in = ctxt->input->buf;
10898 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10899 (in->raw != NULL)) {
10900 int nbchars;
10901
10902 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10903 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010904 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010905 xmlGenericError(xmlGenericErrorContext,
10906 "xmlParseChunk: encoder error\n");
10907 return(XML_ERR_INVALID_ENCODING);
10908 }
10909 }
10910 }
10911 }
10912 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010913 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10914 (ctxt->input->buf != NULL)) {
10915 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10916 }
Daniel Veillard14412512005-01-21 23:53:26 +000010917 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010918 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010919 if (terminate) {
10920 /*
10921 * Check for termination
10922 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010923 int avail = 0;
10924
10925 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010926 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010927 avail = ctxt->input->length -
10928 (ctxt->input->cur - ctxt->input->base);
10929 else
10930 avail = ctxt->input->buf->buffer->use -
10931 (ctxt->input->cur - ctxt->input->base);
10932 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010933
Owen Taylor3473f882001-02-23 17:55:21 +000010934 if ((ctxt->instate != XML_PARSER_EOF) &&
10935 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010936 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010937 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010938 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010939 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010940 }
Owen Taylor3473f882001-02-23 17:55:21 +000010941 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010943 ctxt->sax->endDocument(ctxt->userData);
10944 }
10945 ctxt->instate = XML_PARSER_EOF;
10946 }
10947 return((xmlParserErrors) ctxt->errNo);
10948}
10949
10950/************************************************************************
10951 * *
10952 * I/O front end functions to the parser *
10953 * *
10954 ************************************************************************/
10955
10956/**
Owen Taylor3473f882001-02-23 17:55:21 +000010957 * xmlCreatePushParserCtxt:
10958 * @sax: a SAX handler
10959 * @user_data: The user data returned on SAX callbacks
10960 * @chunk: a pointer to an array of chars
10961 * @size: number of chars in the array
10962 * @filename: an optional file name or URI
10963 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010964 * Create a parser context for using the XML parser in push mode.
10965 * If @buffer and @size are non-NULL, the data is used to detect
10966 * the encoding. The remaining characters will be parsed so they
10967 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010968 * To allow content encoding detection, @size should be >= 4
10969 * The value of @filename is used for fetching external entities
10970 * and error/warning reports.
10971 *
10972 * Returns the new parser context or NULL
10973 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010974
Owen Taylor3473f882001-02-23 17:55:21 +000010975xmlParserCtxtPtr
10976xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10977 const char *chunk, int size, const char *filename) {
10978 xmlParserCtxtPtr ctxt;
10979 xmlParserInputPtr inputStream;
10980 xmlParserInputBufferPtr buf;
10981 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10982
10983 /*
10984 * plug some encoding conversion routines
10985 */
10986 if ((chunk != NULL) && (size >= 4))
10987 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10988
10989 buf = xmlAllocParserInputBuffer(enc);
10990 if (buf == NULL) return(NULL);
10991
10992 ctxt = xmlNewParserCtxt();
10993 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010994 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010995 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010996 return(NULL);
10997 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010998 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010999 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11000 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011001 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011002 xmlFreeParserInputBuffer(buf);
11003 xmlFreeParserCtxt(ctxt);
11004 return(NULL);
11005 }
Owen Taylor3473f882001-02-23 17:55:21 +000011006 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011007#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011008 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011009#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011010 xmlFree(ctxt->sax);
11011 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11012 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011013 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011014 xmlFreeParserInputBuffer(buf);
11015 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011016 return(NULL);
11017 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011018 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11019 if (sax->initialized == XML_SAX2_MAGIC)
11020 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11021 else
11022 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011023 if (user_data != NULL)
11024 ctxt->userData = user_data;
11025 }
11026 if (filename == NULL) {
11027 ctxt->directory = NULL;
11028 } else {
11029 ctxt->directory = xmlParserGetDirectory(filename);
11030 }
11031
11032 inputStream = xmlNewInputStream(ctxt);
11033 if (inputStream == NULL) {
11034 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011035 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011036 return(NULL);
11037 }
11038
11039 if (filename == NULL)
11040 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011041 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011042 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011043 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011044 if (inputStream->filename == NULL) {
11045 xmlFreeParserCtxt(ctxt);
11046 xmlFreeParserInputBuffer(buf);
11047 return(NULL);
11048 }
11049 }
Owen Taylor3473f882001-02-23 17:55:21 +000011050 inputStream->buf = buf;
11051 inputStream->base = inputStream->buf->buffer->content;
11052 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011053 inputStream->end =
11054 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011055
11056 inputPush(ctxt, inputStream);
11057
William M. Brack3a1cd212005-02-11 14:35:54 +000011058 /*
11059 * If the caller didn't provide an initial 'chunk' for determining
11060 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11061 * that it can be automatically determined later
11062 */
11063 if ((size == 0) || (chunk == NULL)) {
11064 ctxt->charset = XML_CHAR_ENCODING_NONE;
11065 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011066 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11067 int cur = ctxt->input->cur - ctxt->input->base;
11068
Owen Taylor3473f882001-02-23 17:55:21 +000011069 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011070
11071 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11072 ctxt->input->cur = ctxt->input->base + cur;
11073 ctxt->input->end =
11074 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011075#ifdef DEBUG_PUSH
11076 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11077#endif
11078 }
11079
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011080 if (enc != XML_CHAR_ENCODING_NONE) {
11081 xmlSwitchEncoding(ctxt, enc);
11082 }
11083
Owen Taylor3473f882001-02-23 17:55:21 +000011084 return(ctxt);
11085}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011086#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011087
11088/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011089 * xmlStopParser:
11090 * @ctxt: an XML parser context
11091 *
11092 * Blocks further parser processing
11093 */
11094void
11095xmlStopParser(xmlParserCtxtPtr ctxt) {
11096 if (ctxt == NULL)
11097 return;
11098 ctxt->instate = XML_PARSER_EOF;
11099 ctxt->disableSAX = 1;
11100 if (ctxt->input != NULL) {
11101 ctxt->input->cur = BAD_CAST"";
11102 ctxt->input->base = ctxt->input->cur;
11103 }
11104}
11105
11106/**
Owen Taylor3473f882001-02-23 17:55:21 +000011107 * xmlCreateIOParserCtxt:
11108 * @sax: a SAX handler
11109 * @user_data: The user data returned on SAX callbacks
11110 * @ioread: an I/O read function
11111 * @ioclose: an I/O close function
11112 * @ioctx: an I/O handler
11113 * @enc: the charset encoding if known
11114 *
11115 * Create a parser context for using the XML parser with an existing
11116 * I/O stream
11117 *
11118 * Returns the new parser context or NULL
11119 */
11120xmlParserCtxtPtr
11121xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11122 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11123 void *ioctx, xmlCharEncoding enc) {
11124 xmlParserCtxtPtr ctxt;
11125 xmlParserInputPtr inputStream;
11126 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011127
11128 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011129
11130 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11131 if (buf == NULL) return(NULL);
11132
11133 ctxt = xmlNewParserCtxt();
11134 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011135 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011136 return(NULL);
11137 }
11138 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011139#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011140 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011141#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011142 xmlFree(ctxt->sax);
11143 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11144 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011145 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011146 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011147 return(NULL);
11148 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011149 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11150 if (sax->initialized == XML_SAX2_MAGIC)
11151 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11152 else
11153 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011154 if (user_data != NULL)
11155 ctxt->userData = user_data;
11156 }
11157
11158 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11159 if (inputStream == NULL) {
11160 xmlFreeParserCtxt(ctxt);
11161 return(NULL);
11162 }
11163 inputPush(ctxt, inputStream);
11164
11165 return(ctxt);
11166}
11167
Daniel Veillard4432df22003-09-28 18:58:27 +000011168#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011169/************************************************************************
11170 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011171 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011172 * *
11173 ************************************************************************/
11174
11175/**
11176 * xmlIOParseDTD:
11177 * @sax: the SAX handler block or NULL
11178 * @input: an Input Buffer
11179 * @enc: the charset encoding if known
11180 *
11181 * Load and parse a DTD
11182 *
11183 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011184 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011185 */
11186
11187xmlDtdPtr
11188xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11189 xmlCharEncoding enc) {
11190 xmlDtdPtr ret = NULL;
11191 xmlParserCtxtPtr ctxt;
11192 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011193 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011194
11195 if (input == NULL)
11196 return(NULL);
11197
11198 ctxt = xmlNewParserCtxt();
11199 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011200 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011201 return(NULL);
11202 }
11203
11204 /*
11205 * Set-up the SAX context
11206 */
11207 if (sax != NULL) {
11208 if (ctxt->sax != NULL)
11209 xmlFree(ctxt->sax);
11210 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011211 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011212 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011213 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011214
11215 /*
11216 * generate a parser input from the I/O handler
11217 */
11218
Daniel Veillard43caefb2003-12-07 19:32:22 +000011219 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011220 if (pinput == NULL) {
11221 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011222 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011223 xmlFreeParserCtxt(ctxt);
11224 return(NULL);
11225 }
11226
11227 /*
11228 * plug some encoding conversion routines here.
11229 */
11230 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011231 if (enc != XML_CHAR_ENCODING_NONE) {
11232 xmlSwitchEncoding(ctxt, enc);
11233 }
Owen Taylor3473f882001-02-23 17:55:21 +000011234
11235 pinput->filename = NULL;
11236 pinput->line = 1;
11237 pinput->col = 1;
11238 pinput->base = ctxt->input->cur;
11239 pinput->cur = ctxt->input->cur;
11240 pinput->free = NULL;
11241
11242 /*
11243 * let's parse that entity knowing it's an external subset.
11244 */
11245 ctxt->inSubset = 2;
11246 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11247 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11248 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011249
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011250 if ((enc == XML_CHAR_ENCODING_NONE) &&
11251 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011252 /*
11253 * Get the 4 first bytes and decode the charset
11254 * if enc != XML_CHAR_ENCODING_NONE
11255 * plug some encoding conversion routines.
11256 */
11257 start[0] = RAW;
11258 start[1] = NXT(1);
11259 start[2] = NXT(2);
11260 start[3] = NXT(3);
11261 enc = xmlDetectCharEncoding(start, 4);
11262 if (enc != XML_CHAR_ENCODING_NONE) {
11263 xmlSwitchEncoding(ctxt, enc);
11264 }
11265 }
11266
Owen Taylor3473f882001-02-23 17:55:21 +000011267 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11268
11269 if (ctxt->myDoc != NULL) {
11270 if (ctxt->wellFormed) {
11271 ret = ctxt->myDoc->extSubset;
11272 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011273 if (ret != NULL) {
11274 xmlNodePtr tmp;
11275
11276 ret->doc = NULL;
11277 tmp = ret->children;
11278 while (tmp != NULL) {
11279 tmp->doc = NULL;
11280 tmp = tmp->next;
11281 }
11282 }
Owen Taylor3473f882001-02-23 17:55:21 +000011283 } else {
11284 ret = NULL;
11285 }
11286 xmlFreeDoc(ctxt->myDoc);
11287 ctxt->myDoc = NULL;
11288 }
11289 if (sax != NULL) ctxt->sax = NULL;
11290 xmlFreeParserCtxt(ctxt);
11291
11292 return(ret);
11293}
11294
11295/**
11296 * xmlSAXParseDTD:
11297 * @sax: the SAX handler block
11298 * @ExternalID: a NAME* containing the External ID of the DTD
11299 * @SystemID: a NAME* containing the URL to the DTD
11300 *
11301 * Load and parse an external subset.
11302 *
11303 * Returns the resulting xmlDtdPtr or NULL in case of error.
11304 */
11305
11306xmlDtdPtr
11307xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11308 const xmlChar *SystemID) {
11309 xmlDtdPtr ret = NULL;
11310 xmlParserCtxtPtr ctxt;
11311 xmlParserInputPtr input = NULL;
11312 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011313 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011314
11315 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11316
11317 ctxt = xmlNewParserCtxt();
11318 if (ctxt == NULL) {
11319 return(NULL);
11320 }
11321
11322 /*
11323 * Set-up the SAX context
11324 */
11325 if (sax != NULL) {
11326 if (ctxt->sax != NULL)
11327 xmlFree(ctxt->sax);
11328 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011329 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011330 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011331
11332 /*
11333 * Canonicalise the system ID
11334 */
11335 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011336 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011337 xmlFreeParserCtxt(ctxt);
11338 return(NULL);
11339 }
Owen Taylor3473f882001-02-23 17:55:21 +000011340
11341 /*
11342 * Ask the Entity resolver to load the damn thing
11343 */
11344
11345 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011346 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11347 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011348 if (input == NULL) {
11349 if (sax != NULL) ctxt->sax = NULL;
11350 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011351 if (systemIdCanonic != NULL)
11352 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011353 return(NULL);
11354 }
11355
11356 /*
11357 * plug some encoding conversion routines here.
11358 */
11359 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011360 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11361 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11362 xmlSwitchEncoding(ctxt, enc);
11363 }
Owen Taylor3473f882001-02-23 17:55:21 +000011364
11365 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011366 input->filename = (char *) systemIdCanonic;
11367 else
11368 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011369 input->line = 1;
11370 input->col = 1;
11371 input->base = ctxt->input->cur;
11372 input->cur = ctxt->input->cur;
11373 input->free = NULL;
11374
11375 /*
11376 * let's parse that entity knowing it's an external subset.
11377 */
11378 ctxt->inSubset = 2;
11379 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11380 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11381 ExternalID, SystemID);
11382 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11383
11384 if (ctxt->myDoc != NULL) {
11385 if (ctxt->wellFormed) {
11386 ret = ctxt->myDoc->extSubset;
11387 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011388 if (ret != NULL) {
11389 xmlNodePtr tmp;
11390
11391 ret->doc = NULL;
11392 tmp = ret->children;
11393 while (tmp != NULL) {
11394 tmp->doc = NULL;
11395 tmp = tmp->next;
11396 }
11397 }
Owen Taylor3473f882001-02-23 17:55:21 +000011398 } else {
11399 ret = NULL;
11400 }
11401 xmlFreeDoc(ctxt->myDoc);
11402 ctxt->myDoc = NULL;
11403 }
11404 if (sax != NULL) ctxt->sax = NULL;
11405 xmlFreeParserCtxt(ctxt);
11406
11407 return(ret);
11408}
11409
Daniel Veillard4432df22003-09-28 18:58:27 +000011410
Owen Taylor3473f882001-02-23 17:55:21 +000011411/**
11412 * xmlParseDTD:
11413 * @ExternalID: a NAME* containing the External ID of the DTD
11414 * @SystemID: a NAME* containing the URL to the DTD
11415 *
11416 * Load and parse an external subset.
11417 *
11418 * Returns the resulting xmlDtdPtr or NULL in case of error.
11419 */
11420
11421xmlDtdPtr
11422xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11423 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11424}
Daniel Veillard4432df22003-09-28 18:58:27 +000011425#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011426
11427/************************************************************************
11428 * *
11429 * Front ends when parsing an Entity *
11430 * *
11431 ************************************************************************/
11432
11433/**
Owen Taylor3473f882001-02-23 17:55:21 +000011434 * xmlParseCtxtExternalEntity:
11435 * @ctx: the existing parsing context
11436 * @URL: the URL for the entity to load
11437 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011438 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011439 *
11440 * Parse an external general entity within an existing parsing context
11441 * An external general parsed entity is well-formed if it matches the
11442 * production labeled extParsedEnt.
11443 *
11444 * [78] extParsedEnt ::= TextDecl? content
11445 *
11446 * Returns 0 if the entity is well formed, -1 in case of args problem and
11447 * the parser error code otherwise
11448 */
11449
11450int
11451xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011452 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011453 xmlParserCtxtPtr ctxt;
11454 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011455 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011456 xmlSAXHandlerPtr oldsax = NULL;
11457 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011458 xmlChar start[4];
11459 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011460 xmlParserInputPtr inputStream;
11461 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011462
Daniel Veillardce682bc2004-11-05 17:22:25 +000011463 if (ctx == NULL) return(-1);
11464
Owen Taylor3473f882001-02-23 17:55:21 +000011465 if (ctx->depth > 40) {
11466 return(XML_ERR_ENTITY_LOOP);
11467 }
11468
Daniel Veillardcda96922001-08-21 10:56:31 +000011469 if (lst != NULL)
11470 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011471 if ((URL == NULL) && (ID == NULL))
11472 return(-1);
11473 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11474 return(-1);
11475
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011476 ctxt = xmlNewParserCtxt();
11477 if (ctxt == NULL) {
11478 return(-1);
11479 }
11480
Owen Taylor3473f882001-02-23 17:55:21 +000011481 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011482 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011483
11484 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11485 if (inputStream == NULL) {
11486 xmlFreeParserCtxt(ctxt);
11487 return(-1);
11488 }
11489
11490 inputPush(ctxt, inputStream);
11491
11492 if ((ctxt->directory == NULL) && (directory == NULL))
11493 directory = xmlParserGetDirectory((char *)URL);
11494 if ((ctxt->directory == NULL) && (directory != NULL))
11495 ctxt->directory = directory;
11496
Owen Taylor3473f882001-02-23 17:55:21 +000011497 oldsax = ctxt->sax;
11498 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011499 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011500 newDoc = xmlNewDoc(BAD_CAST "1.0");
11501 if (newDoc == NULL) {
11502 xmlFreeParserCtxt(ctxt);
11503 return(-1);
11504 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011505 if (ctx->myDoc->dict) {
11506 newDoc->dict = ctx->myDoc->dict;
11507 xmlDictReference(newDoc->dict);
11508 }
Owen Taylor3473f882001-02-23 17:55:21 +000011509 if (ctx->myDoc != NULL) {
11510 newDoc->intSubset = ctx->myDoc->intSubset;
11511 newDoc->extSubset = ctx->myDoc->extSubset;
11512 }
11513 if (ctx->myDoc->URL != NULL) {
11514 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11515 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011516 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11517 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011518 ctxt->sax = oldsax;
11519 xmlFreeParserCtxt(ctxt);
11520 newDoc->intSubset = NULL;
11521 newDoc->extSubset = NULL;
11522 xmlFreeDoc(newDoc);
11523 return(-1);
11524 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011525 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011526 nodePush(ctxt, newDoc->children);
11527 if (ctx->myDoc == NULL) {
11528 ctxt->myDoc = newDoc;
11529 } else {
11530 ctxt->myDoc = ctx->myDoc;
11531 newDoc->children->doc = ctx->myDoc;
11532 }
11533
Daniel Veillard87a764e2001-06-20 17:41:10 +000011534 /*
11535 * Get the 4 first bytes and decode the charset
11536 * if enc != XML_CHAR_ENCODING_NONE
11537 * plug some encoding conversion routines.
11538 */
11539 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011540 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11541 start[0] = RAW;
11542 start[1] = NXT(1);
11543 start[2] = NXT(2);
11544 start[3] = NXT(3);
11545 enc = xmlDetectCharEncoding(start, 4);
11546 if (enc != XML_CHAR_ENCODING_NONE) {
11547 xmlSwitchEncoding(ctxt, enc);
11548 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011549 }
11550
Owen Taylor3473f882001-02-23 17:55:21 +000011551 /*
11552 * Parse a possible text declaration first
11553 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011554 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011555 xmlParseTextDecl(ctxt);
11556 }
11557
11558 /*
11559 * Doing validity checking on chunk doesn't make sense
11560 */
11561 ctxt->instate = XML_PARSER_CONTENT;
11562 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011563 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011564 ctxt->loadsubset = ctx->loadsubset;
11565 ctxt->depth = ctx->depth + 1;
11566 ctxt->replaceEntities = ctx->replaceEntities;
11567 if (ctxt->validate) {
11568 ctxt->vctxt.error = ctx->vctxt.error;
11569 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011570 } else {
11571 ctxt->vctxt.error = NULL;
11572 ctxt->vctxt.warning = NULL;
11573 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011574 ctxt->vctxt.nodeTab = NULL;
11575 ctxt->vctxt.nodeNr = 0;
11576 ctxt->vctxt.nodeMax = 0;
11577 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011578 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11579 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011580 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11581 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11582 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011583 ctxt->dictNames = ctx->dictNames;
11584 ctxt->attsDefault = ctx->attsDefault;
11585 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011586 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011587
11588 xmlParseContent(ctxt);
11589
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011590 ctx->validate = ctxt->validate;
11591 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011592 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011593 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011594 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011595 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011596 }
11597 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011598 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011599 }
11600
11601 if (!ctxt->wellFormed) {
11602 if (ctxt->errNo == 0)
11603 ret = 1;
11604 else
11605 ret = ctxt->errNo;
11606 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011607 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011608 xmlNodePtr cur;
11609
11610 /*
11611 * Return the newly created nodeset after unlinking it from
11612 * they pseudo parent.
11613 */
11614 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011615 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011616 while (cur != NULL) {
11617 cur->parent = NULL;
11618 cur = cur->next;
11619 }
11620 newDoc->children->children = NULL;
11621 }
11622 ret = 0;
11623 }
11624 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011625 ctxt->dict = NULL;
11626 ctxt->attsDefault = NULL;
11627 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011628 xmlFreeParserCtxt(ctxt);
11629 newDoc->intSubset = NULL;
11630 newDoc->extSubset = NULL;
11631 xmlFreeDoc(newDoc);
11632
11633 return(ret);
11634}
11635
11636/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011637 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011638 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011639 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011640 * @sax: the SAX handler bloc (possibly NULL)
11641 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11642 * @depth: Used for loop detection, use 0
11643 * @URL: the URL for the entity to load
11644 * @ID: the System ID for the entity to load
11645 * @list: the return value for the set of parsed nodes
11646 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011647 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011648 *
11649 * Returns 0 if the entity is well formed, -1 in case of args problem and
11650 * the parser error code otherwise
11651 */
11652
Daniel Veillard7d515752003-09-26 19:12:37 +000011653static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011654xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11655 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011656 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011657 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011658 xmlParserCtxtPtr ctxt;
11659 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011660 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011661 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011662 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011663 xmlChar start[4];
11664 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011665
11666 if (depth > 40) {
11667 return(XML_ERR_ENTITY_LOOP);
11668 }
11669
11670
11671
11672 if (list != NULL)
11673 *list = NULL;
11674 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011675 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011676 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011677 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011678
11679
11680 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011681 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011682 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011683 if (oldctxt != NULL) {
11684 ctxt->_private = oldctxt->_private;
11685 ctxt->loadsubset = oldctxt->loadsubset;
11686 ctxt->validate = oldctxt->validate;
11687 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011688 ctxt->record_info = oldctxt->record_info;
11689 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11690 ctxt->node_seq.length = oldctxt->node_seq.length;
11691 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011692 } else {
11693 /*
11694 * Doing validity checking on chunk without context
11695 * doesn't make sense
11696 */
11697 ctxt->_private = NULL;
11698 ctxt->validate = 0;
11699 ctxt->external = 2;
11700 ctxt->loadsubset = 0;
11701 }
Owen Taylor3473f882001-02-23 17:55:21 +000011702 if (sax != NULL) {
11703 oldsax = ctxt->sax;
11704 ctxt->sax = sax;
11705 if (user_data != NULL)
11706 ctxt->userData = user_data;
11707 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011708 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011709 newDoc = xmlNewDoc(BAD_CAST "1.0");
11710 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011711 ctxt->node_seq.maximum = 0;
11712 ctxt->node_seq.length = 0;
11713 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011714 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011715 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011716 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011717 newDoc->intSubset = doc->intSubset;
11718 newDoc->extSubset = doc->extSubset;
11719 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011720 xmlDictReference(newDoc->dict);
11721
Owen Taylor3473f882001-02-23 17:55:21 +000011722 if (doc->URL != NULL) {
11723 newDoc->URL = xmlStrdup(doc->URL);
11724 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011725 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11726 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011727 if (sax != NULL)
11728 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011729 ctxt->node_seq.maximum = 0;
11730 ctxt->node_seq.length = 0;
11731 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 xmlFreeParserCtxt(ctxt);
11733 newDoc->intSubset = NULL;
11734 newDoc->extSubset = NULL;
11735 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011736 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011737 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011738 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011739 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011740 ctxt->myDoc = doc;
11741 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011742
Daniel Veillard87a764e2001-06-20 17:41:10 +000011743 /*
11744 * Get the 4 first bytes and decode the charset
11745 * if enc != XML_CHAR_ENCODING_NONE
11746 * plug some encoding conversion routines.
11747 */
11748 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011749 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11750 start[0] = RAW;
11751 start[1] = NXT(1);
11752 start[2] = NXT(2);
11753 start[3] = NXT(3);
11754 enc = xmlDetectCharEncoding(start, 4);
11755 if (enc != XML_CHAR_ENCODING_NONE) {
11756 xmlSwitchEncoding(ctxt, enc);
11757 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011758 }
11759
Owen Taylor3473f882001-02-23 17:55:21 +000011760 /*
11761 * Parse a possible text declaration first
11762 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011763 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011764 xmlParseTextDecl(ctxt);
11765 }
11766
Owen Taylor3473f882001-02-23 17:55:21 +000011767 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011768 ctxt->depth = depth;
11769
11770 xmlParseContent(ctxt);
11771
Daniel Veillard561b7f82002-03-20 21:55:57 +000011772 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011773 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011774 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011775 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011776 }
11777 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011778 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011779 }
11780
11781 if (!ctxt->wellFormed) {
11782 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011783 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011784 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011785 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011786 } else {
11787 if (list != NULL) {
11788 xmlNodePtr cur;
11789
11790 /*
11791 * Return the newly created nodeset after unlinking it from
11792 * they pseudo parent.
11793 */
11794 cur = newDoc->children->children;
11795 *list = cur;
11796 while (cur != NULL) {
11797 cur->parent = NULL;
11798 cur = cur->next;
11799 }
11800 newDoc->children->children = NULL;
11801 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011802 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011803 }
11804 if (sax != NULL)
11805 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011806 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11807 oldctxt->node_seq.length = ctxt->node_seq.length;
11808 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011809 ctxt->node_seq.maximum = 0;
11810 ctxt->node_seq.length = 0;
11811 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011812 xmlFreeParserCtxt(ctxt);
11813 newDoc->intSubset = NULL;
11814 newDoc->extSubset = NULL;
11815 xmlFreeDoc(newDoc);
11816
11817 return(ret);
11818}
11819
Daniel Veillard81273902003-09-30 00:43:48 +000011820#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011821/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011822 * xmlParseExternalEntity:
11823 * @doc: the document the chunk pertains to
11824 * @sax: the SAX handler bloc (possibly NULL)
11825 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11826 * @depth: Used for loop detection, use 0
11827 * @URL: the URL for the entity to load
11828 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011829 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011830 *
11831 * Parse an external general entity
11832 * An external general parsed entity is well-formed if it matches the
11833 * production labeled extParsedEnt.
11834 *
11835 * [78] extParsedEnt ::= TextDecl? content
11836 *
11837 * Returns 0 if the entity is well formed, -1 in case of args problem and
11838 * the parser error code otherwise
11839 */
11840
11841int
11842xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011843 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011844 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011845 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011846}
11847
11848/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011849 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011850 * @doc: the document the chunk pertains to
11851 * @sax: the SAX handler bloc (possibly NULL)
11852 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11853 * @depth: Used for loop detection, use 0
11854 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011855 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011856 *
11857 * Parse a well-balanced chunk of an XML document
11858 * called by the parser
11859 * The allowed sequence for the Well Balanced Chunk is the one defined by
11860 * the content production in the XML grammar:
11861 *
11862 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11863 *
11864 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11865 * the parser error code otherwise
11866 */
11867
11868int
11869xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011870 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011871 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11872 depth, string, lst, 0 );
11873}
Daniel Veillard81273902003-09-30 00:43:48 +000011874#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011875
11876/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011877 * xmlParseBalancedChunkMemoryInternal:
11878 * @oldctxt: the existing parsing context
11879 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11880 * @user_data: the user data field for the parser context
11881 * @lst: the return value for the set of parsed nodes
11882 *
11883 *
11884 * Parse a well-balanced chunk of an XML document
11885 * called by the parser
11886 * The allowed sequence for the Well Balanced Chunk is the one defined by
11887 * the content production in the XML grammar:
11888 *
11889 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11890 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011891 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11892 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011893 *
11894 * In case recover is set to 1, the nodelist will not be empty even if
11895 * the parsed chunk is not well balanced.
11896 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011897static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011898xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11899 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11900 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011901 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011902 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011903 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011904 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011905 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011906 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011907 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011908
11909 if (oldctxt->depth > 40) {
11910 return(XML_ERR_ENTITY_LOOP);
11911 }
11912
11913
11914 if (lst != NULL)
11915 *lst = NULL;
11916 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011917 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011918
11919 size = xmlStrlen(string);
11920
11921 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011922 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011923 if (user_data != NULL)
11924 ctxt->userData = user_data;
11925 else
11926 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011927 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11928 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011929 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11930 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11931 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011932
11933 oldsax = ctxt->sax;
11934 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011935 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011936 ctxt->replaceEntities = oldctxt->replaceEntities;
11937 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011938
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011939 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011940 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011941 newDoc = xmlNewDoc(BAD_CAST "1.0");
11942 if (newDoc == NULL) {
11943 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011944 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011945 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011946 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011947 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011948 newDoc->dict = ctxt->dict;
11949 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011950 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011951 } else {
11952 ctxt->myDoc = oldctxt->myDoc;
11953 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011954 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011955 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011956 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11957 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011958 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011959 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011960 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011961 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011962 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011963 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011964 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011965 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011966 ctxt->myDoc->children = NULL;
11967 ctxt->myDoc->last = NULL;
11968 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011969 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011970 ctxt->instate = XML_PARSER_CONTENT;
11971 ctxt->depth = oldctxt->depth + 1;
11972
Daniel Veillard328f48c2002-11-15 15:24:34 +000011973 ctxt->validate = 0;
11974 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011975 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11976 /*
11977 * ID/IDREF registration will be done in xmlValidateElement below
11978 */
11979 ctxt->loadsubset |= XML_SKIP_IDS;
11980 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011981 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011982 ctxt->attsDefault = oldctxt->attsDefault;
11983 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011984
Daniel Veillard68e9e742002-11-16 15:35:11 +000011985 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011986 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011987 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011988 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011989 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011990 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011991 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011992 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011993 }
11994
11995 if (!ctxt->wellFormed) {
11996 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011997 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011998 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011999 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012000 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012001 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012002 }
12003
William M. Brack7b9154b2003-09-27 19:23:50 +000012004 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012005 xmlNodePtr cur;
12006
12007 /*
12008 * Return the newly created nodeset after unlinking it from
12009 * they pseudo parent.
12010 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012011 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012012 *lst = cur;
12013 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012014#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012015 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12016 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12017 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012018 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12019 oldctxt->myDoc, cur);
12020 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012021#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012022 cur->parent = NULL;
12023 cur = cur->next;
12024 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012025 ctxt->myDoc->children->children = NULL;
12026 }
12027 if (ctxt->myDoc != NULL) {
12028 xmlFreeNode(ctxt->myDoc->children);
12029 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012030 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012031 }
12032
12033 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012034 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012035 ctxt->attsDefault = NULL;
12036 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012037 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012038 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012039 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012040 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012041
12042 return(ret);
12043}
12044
Daniel Veillard29b17482004-08-16 00:39:03 +000012045/**
12046 * xmlParseInNodeContext:
12047 * @node: the context node
12048 * @data: the input string
12049 * @datalen: the input string length in bytes
12050 * @options: a combination of xmlParserOption
12051 * @lst: the return value for the set of parsed nodes
12052 *
12053 * Parse a well-balanced chunk of an XML document
12054 * within the context (DTD, namespaces, etc ...) of the given node.
12055 *
12056 * The allowed sequence for the data is a Well Balanced Chunk defined by
12057 * the content production in the XML grammar:
12058 *
12059 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12060 *
12061 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12062 * error code otherwise
12063 */
12064xmlParserErrors
12065xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12066 int options, xmlNodePtr *lst) {
12067#ifdef SAX2
12068 xmlParserCtxtPtr ctxt;
12069 xmlDocPtr doc = NULL;
12070 xmlNodePtr fake, cur;
12071 int nsnr = 0;
12072
12073 xmlParserErrors ret = XML_ERR_OK;
12074
12075 /*
12076 * check all input parameters, grab the document
12077 */
12078 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12079 return(XML_ERR_INTERNAL_ERROR);
12080 switch (node->type) {
12081 case XML_ELEMENT_NODE:
12082 case XML_ATTRIBUTE_NODE:
12083 case XML_TEXT_NODE:
12084 case XML_CDATA_SECTION_NODE:
12085 case XML_ENTITY_REF_NODE:
12086 case XML_PI_NODE:
12087 case XML_COMMENT_NODE:
12088 case XML_DOCUMENT_NODE:
12089 case XML_HTML_DOCUMENT_NODE:
12090 break;
12091 default:
12092 return(XML_ERR_INTERNAL_ERROR);
12093
12094 }
12095 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12096 (node->type != XML_DOCUMENT_NODE) &&
12097 (node->type != XML_HTML_DOCUMENT_NODE))
12098 node = node->parent;
12099 if (node == NULL)
12100 return(XML_ERR_INTERNAL_ERROR);
12101 if (node->type == XML_ELEMENT_NODE)
12102 doc = node->doc;
12103 else
12104 doc = (xmlDocPtr) node;
12105 if (doc == NULL)
12106 return(XML_ERR_INTERNAL_ERROR);
12107
12108 /*
12109 * allocate a context and set-up everything not related to the
12110 * node position in the tree
12111 */
12112 if (doc->type == XML_DOCUMENT_NODE)
12113 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12114#ifdef LIBXML_HTML_ENABLED
12115 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12116 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12117#endif
12118 else
12119 return(XML_ERR_INTERNAL_ERROR);
12120
12121 if (ctxt == NULL)
12122 return(XML_ERR_NO_MEMORY);
12123 fake = xmlNewComment(NULL);
12124 if (fake == NULL) {
12125 xmlFreeParserCtxt(ctxt);
12126 return(XML_ERR_NO_MEMORY);
12127 }
12128 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012129
12130 /*
12131 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12132 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12133 * we must wait until the last moment to free the original one.
12134 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012135 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012136 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012137 xmlDictFree(ctxt->dict);
12138 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012139 } else
12140 options |= XML_PARSE_NODICT;
12141
12142 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012143 xmlDetectSAX2(ctxt);
12144 ctxt->myDoc = doc;
12145
12146 if (node->type == XML_ELEMENT_NODE) {
12147 nodePush(ctxt, node);
12148 /*
12149 * initialize the SAX2 namespaces stack
12150 */
12151 cur = node;
12152 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12153 xmlNsPtr ns = cur->nsDef;
12154 const xmlChar *iprefix, *ihref;
12155
12156 while (ns != NULL) {
12157 if (ctxt->dict) {
12158 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12159 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12160 } else {
12161 iprefix = ns->prefix;
12162 ihref = ns->href;
12163 }
12164
12165 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12166 nsPush(ctxt, iprefix, ihref);
12167 nsnr++;
12168 }
12169 ns = ns->next;
12170 }
12171 cur = cur->parent;
12172 }
12173 ctxt->instate = XML_PARSER_CONTENT;
12174 }
12175
12176 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12177 /*
12178 * ID/IDREF registration will be done in xmlValidateElement below
12179 */
12180 ctxt->loadsubset |= XML_SKIP_IDS;
12181 }
12182
Daniel Veillard499cc922006-01-18 17:22:35 +000012183#ifdef LIBXML_HTML_ENABLED
12184 if (doc->type == XML_HTML_DOCUMENT_NODE)
12185 __htmlParseContent(ctxt);
12186 else
12187#endif
12188 xmlParseContent(ctxt);
12189
Daniel Veillard29b17482004-08-16 00:39:03 +000012190 nsPop(ctxt, nsnr);
12191 if ((RAW == '<') && (NXT(1) == '/')) {
12192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12193 } else if (RAW != 0) {
12194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12195 }
12196 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12198 ctxt->wellFormed = 0;
12199 }
12200
12201 if (!ctxt->wellFormed) {
12202 if (ctxt->errNo == 0)
12203 ret = XML_ERR_INTERNAL_ERROR;
12204 else
12205 ret = (xmlParserErrors)ctxt->errNo;
12206 } else {
12207 ret = XML_ERR_OK;
12208 }
12209
12210 /*
12211 * Return the newly created nodeset after unlinking it from
12212 * the pseudo sibling.
12213 */
12214
12215 cur = fake->next;
12216 fake->next = NULL;
12217 node->last = fake;
12218
12219 if (cur != NULL) {
12220 cur->prev = NULL;
12221 }
12222
12223 *lst = cur;
12224
12225 while (cur != NULL) {
12226 cur->parent = NULL;
12227 cur = cur->next;
12228 }
12229
12230 xmlUnlinkNode(fake);
12231 xmlFreeNode(fake);
12232
12233
12234 if (ret != XML_ERR_OK) {
12235 xmlFreeNodeList(*lst);
12236 *lst = NULL;
12237 }
William M. Brackc3f81342004-10-03 01:22:44 +000012238
William M. Brackb7b54de2004-10-06 16:38:01 +000012239 if (doc->dict != NULL)
12240 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012241 xmlFreeParserCtxt(ctxt);
12242
12243 return(ret);
12244#else /* !SAX2 */
12245 return(XML_ERR_INTERNAL_ERROR);
12246#endif
12247}
12248
Daniel Veillard81273902003-09-30 00:43:48 +000012249#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012250/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012251 * xmlParseBalancedChunkMemoryRecover:
12252 * @doc: the document the chunk pertains to
12253 * @sax: the SAX handler bloc (possibly NULL)
12254 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12255 * @depth: Used for loop detection, use 0
12256 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12257 * @lst: the return value for the set of parsed nodes
12258 * @recover: return nodes even if the data is broken (use 0)
12259 *
12260 *
12261 * Parse a well-balanced chunk of an XML document
12262 * called by the parser
12263 * The allowed sequence for the Well Balanced Chunk is the one defined by
12264 * the content production in the XML grammar:
12265 *
12266 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12267 *
12268 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12269 * the parser error code otherwise
12270 *
12271 * In case recover is set to 1, the nodelist will not be empty even if
12272 * the parsed chunk is not well balanced.
12273 */
12274int
12275xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12276 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12277 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012278 xmlParserCtxtPtr ctxt;
12279 xmlDocPtr newDoc;
12280 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012281 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012282 int size;
12283 int ret = 0;
12284
12285 if (depth > 40) {
12286 return(XML_ERR_ENTITY_LOOP);
12287 }
12288
12289
Daniel Veillardcda96922001-08-21 10:56:31 +000012290 if (lst != NULL)
12291 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012292 if (string == NULL)
12293 return(-1);
12294
12295 size = xmlStrlen(string);
12296
12297 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12298 if (ctxt == NULL) return(-1);
12299 ctxt->userData = ctxt;
12300 if (sax != NULL) {
12301 oldsax = ctxt->sax;
12302 ctxt->sax = sax;
12303 if (user_data != NULL)
12304 ctxt->userData = user_data;
12305 }
12306 newDoc = xmlNewDoc(BAD_CAST "1.0");
12307 if (newDoc == NULL) {
12308 xmlFreeParserCtxt(ctxt);
12309 return(-1);
12310 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012311 if ((doc != NULL) && (doc->dict != NULL)) {
12312 xmlDictFree(ctxt->dict);
12313 ctxt->dict = doc->dict;
12314 xmlDictReference(ctxt->dict);
12315 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12316 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12317 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12318 ctxt->dictNames = 1;
12319 } else {
12320 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12321 }
Owen Taylor3473f882001-02-23 17:55:21 +000012322 if (doc != NULL) {
12323 newDoc->intSubset = doc->intSubset;
12324 newDoc->extSubset = doc->extSubset;
12325 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012326 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12327 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012328 if (sax != NULL)
12329 ctxt->sax = oldsax;
12330 xmlFreeParserCtxt(ctxt);
12331 newDoc->intSubset = NULL;
12332 newDoc->extSubset = NULL;
12333 xmlFreeDoc(newDoc);
12334 return(-1);
12335 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012336 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12337 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012338 if (doc == NULL) {
12339 ctxt->myDoc = newDoc;
12340 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012341 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012342 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012343 /* Ensure that doc has XML spec namespace */
12344 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12345 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012346 }
12347 ctxt->instate = XML_PARSER_CONTENT;
12348 ctxt->depth = depth;
12349
12350 /*
12351 * Doing validity checking on chunk doesn't make sense
12352 */
12353 ctxt->validate = 0;
12354 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012355 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012356
Daniel Veillardb39bc392002-10-26 19:29:51 +000012357 if ( doc != NULL ){
12358 content = doc->children;
12359 doc->children = NULL;
12360 xmlParseContent(ctxt);
12361 doc->children = content;
12362 }
12363 else {
12364 xmlParseContent(ctxt);
12365 }
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012367 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012368 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012369 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012370 }
12371 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012372 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012373 }
12374
12375 if (!ctxt->wellFormed) {
12376 if (ctxt->errNo == 0)
12377 ret = 1;
12378 else
12379 ret = ctxt->errNo;
12380 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012381 ret = 0;
12382 }
12383
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012384 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12385 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012386
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012387 /*
12388 * Return the newly created nodeset after unlinking it from
12389 * they pseudo parent.
12390 */
12391 cur = newDoc->children->children;
12392 *lst = cur;
12393 while (cur != NULL) {
12394 xmlSetTreeDoc(cur, doc);
12395 cur->parent = NULL;
12396 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012397 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012398 newDoc->children->children = NULL;
12399 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012400
Owen Taylor3473f882001-02-23 17:55:21 +000012401 if (sax != NULL)
12402 ctxt->sax = oldsax;
12403 xmlFreeParserCtxt(ctxt);
12404 newDoc->intSubset = NULL;
12405 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012406 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012407 xmlFreeDoc(newDoc);
12408
12409 return(ret);
12410}
12411
12412/**
12413 * xmlSAXParseEntity:
12414 * @sax: the SAX handler block
12415 * @filename: the filename
12416 *
12417 * parse an XML external entity out of context and build a tree.
12418 * It use the given SAX function block to handle the parsing callback.
12419 * If sax is NULL, fallback to the default DOM tree building routines.
12420 *
12421 * [78] extParsedEnt ::= TextDecl? content
12422 *
12423 * This correspond to a "Well Balanced" chunk
12424 *
12425 * Returns the resulting document tree
12426 */
12427
12428xmlDocPtr
12429xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12430 xmlDocPtr ret;
12431 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012432
12433 ctxt = xmlCreateFileParserCtxt(filename);
12434 if (ctxt == NULL) {
12435 return(NULL);
12436 }
12437 if (sax != NULL) {
12438 if (ctxt->sax != NULL)
12439 xmlFree(ctxt->sax);
12440 ctxt->sax = sax;
12441 ctxt->userData = NULL;
12442 }
12443
Owen Taylor3473f882001-02-23 17:55:21 +000012444 xmlParseExtParsedEnt(ctxt);
12445
12446 if (ctxt->wellFormed)
12447 ret = ctxt->myDoc;
12448 else {
12449 ret = NULL;
12450 xmlFreeDoc(ctxt->myDoc);
12451 ctxt->myDoc = NULL;
12452 }
12453 if (sax != NULL)
12454 ctxt->sax = NULL;
12455 xmlFreeParserCtxt(ctxt);
12456
12457 return(ret);
12458}
12459
12460/**
12461 * xmlParseEntity:
12462 * @filename: the filename
12463 *
12464 * parse an XML external entity out of context and build a tree.
12465 *
12466 * [78] extParsedEnt ::= TextDecl? content
12467 *
12468 * This correspond to a "Well Balanced" chunk
12469 *
12470 * Returns the resulting document tree
12471 */
12472
12473xmlDocPtr
12474xmlParseEntity(const char *filename) {
12475 return(xmlSAXParseEntity(NULL, filename));
12476}
Daniel Veillard81273902003-09-30 00:43:48 +000012477#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012478
12479/**
12480 * xmlCreateEntityParserCtxt:
12481 * @URL: the entity URL
12482 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012483 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012484 *
12485 * Create a parser context for an external entity
12486 * Automatic support for ZLIB/Compress compressed document is provided
12487 * by default if found at compile-time.
12488 *
12489 * Returns the new parser context or NULL
12490 */
12491xmlParserCtxtPtr
12492xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12493 const xmlChar *base) {
12494 xmlParserCtxtPtr ctxt;
12495 xmlParserInputPtr inputStream;
12496 char *directory = NULL;
12497 xmlChar *uri;
12498
12499 ctxt = xmlNewParserCtxt();
12500 if (ctxt == NULL) {
12501 return(NULL);
12502 }
12503
12504 uri = xmlBuildURI(URL, base);
12505
12506 if (uri == NULL) {
12507 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12508 if (inputStream == NULL) {
12509 xmlFreeParserCtxt(ctxt);
12510 return(NULL);
12511 }
12512
12513 inputPush(ctxt, inputStream);
12514
12515 if ((ctxt->directory == NULL) && (directory == NULL))
12516 directory = xmlParserGetDirectory((char *)URL);
12517 if ((ctxt->directory == NULL) && (directory != NULL))
12518 ctxt->directory = directory;
12519 } else {
12520 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12521 if (inputStream == NULL) {
12522 xmlFree(uri);
12523 xmlFreeParserCtxt(ctxt);
12524 return(NULL);
12525 }
12526
12527 inputPush(ctxt, inputStream);
12528
12529 if ((ctxt->directory == NULL) && (directory == NULL))
12530 directory = xmlParserGetDirectory((char *)uri);
12531 if ((ctxt->directory == NULL) && (directory != NULL))
12532 ctxt->directory = directory;
12533 xmlFree(uri);
12534 }
Owen Taylor3473f882001-02-23 17:55:21 +000012535 return(ctxt);
12536}
12537
12538/************************************************************************
12539 * *
12540 * Front ends when parsing from a file *
12541 * *
12542 ************************************************************************/
12543
12544/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012545 * xmlCreateURLParserCtxt:
12546 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012547 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012548 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012549 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012550 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012551 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012552 *
12553 * Returns the new parser context or NULL
12554 */
12555xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012556xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012557{
12558 xmlParserCtxtPtr ctxt;
12559 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012560 char *directory = NULL;
12561
Owen Taylor3473f882001-02-23 17:55:21 +000012562 ctxt = xmlNewParserCtxt();
12563 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012564 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012565 return(NULL);
12566 }
12567
Daniel Veillarddf292f72005-01-16 19:00:15 +000012568 if (options)
12569 xmlCtxtUseOptions(ctxt, options);
12570 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012571
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012572 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012573 if (inputStream == NULL) {
12574 xmlFreeParserCtxt(ctxt);
12575 return(NULL);
12576 }
12577
Owen Taylor3473f882001-02-23 17:55:21 +000012578 inputPush(ctxt, inputStream);
12579 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012580 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012581 if ((ctxt->directory == NULL) && (directory != NULL))
12582 ctxt->directory = directory;
12583
12584 return(ctxt);
12585}
12586
Daniel Veillard61b93382003-11-03 14:28:31 +000012587/**
12588 * xmlCreateFileParserCtxt:
12589 * @filename: the filename
12590 *
12591 * Create a parser context for a file content.
12592 * Automatic support for ZLIB/Compress compressed document is provided
12593 * by default if found at compile-time.
12594 *
12595 * Returns the new parser context or NULL
12596 */
12597xmlParserCtxtPtr
12598xmlCreateFileParserCtxt(const char *filename)
12599{
12600 return(xmlCreateURLParserCtxt(filename, 0));
12601}
12602
Daniel Veillard81273902003-09-30 00:43:48 +000012603#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012604/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012605 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012606 * @sax: the SAX handler block
12607 * @filename: the filename
12608 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12609 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012610 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012611 *
12612 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12613 * compressed document is provided by default if found at compile-time.
12614 * It use the given SAX function block to handle the parsing callback.
12615 * If sax is NULL, fallback to the default DOM tree building routines.
12616 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012617 * User data (void *) is stored within the parser context in the
12618 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012619 *
Owen Taylor3473f882001-02-23 17:55:21 +000012620 * Returns the resulting document tree
12621 */
12622
12623xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012624xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12625 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012626 xmlDocPtr ret;
12627 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012628
Daniel Veillard635ef722001-10-29 11:48:19 +000012629 xmlInitParser();
12630
Owen Taylor3473f882001-02-23 17:55:21 +000012631 ctxt = xmlCreateFileParserCtxt(filename);
12632 if (ctxt == NULL) {
12633 return(NULL);
12634 }
12635 if (sax != NULL) {
12636 if (ctxt->sax != NULL)
12637 xmlFree(ctxt->sax);
12638 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012639 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012640 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012641 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012642 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012643 }
Owen Taylor3473f882001-02-23 17:55:21 +000012644
Daniel Veillard37d2d162008-03-14 10:54:00 +000012645 if (ctxt->directory == NULL)
12646 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012647
Daniel Veillarddad3f682002-11-17 16:47:27 +000012648 ctxt->recovery = recovery;
12649
Owen Taylor3473f882001-02-23 17:55:21 +000012650 xmlParseDocument(ctxt);
12651
William M. Brackc07329e2003-09-08 01:57:30 +000012652 if ((ctxt->wellFormed) || recovery) {
12653 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012654 if (ret != NULL) {
12655 if (ctxt->input->buf->compressed > 0)
12656 ret->compression = 9;
12657 else
12658 ret->compression = ctxt->input->buf->compressed;
12659 }
William M. Brackc07329e2003-09-08 01:57:30 +000012660 }
Owen Taylor3473f882001-02-23 17:55:21 +000012661 else {
12662 ret = NULL;
12663 xmlFreeDoc(ctxt->myDoc);
12664 ctxt->myDoc = NULL;
12665 }
12666 if (sax != NULL)
12667 ctxt->sax = NULL;
12668 xmlFreeParserCtxt(ctxt);
12669
12670 return(ret);
12671}
12672
12673/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012674 * xmlSAXParseFile:
12675 * @sax: the SAX handler block
12676 * @filename: the filename
12677 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12678 * documents
12679 *
12680 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12681 * compressed document is provided by default if found at compile-time.
12682 * It use the given SAX function block to handle the parsing callback.
12683 * If sax is NULL, fallback to the default DOM tree building routines.
12684 *
12685 * Returns the resulting document tree
12686 */
12687
12688xmlDocPtr
12689xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12690 int recovery) {
12691 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12692}
12693
12694/**
Owen Taylor3473f882001-02-23 17:55:21 +000012695 * xmlRecoverDoc:
12696 * @cur: a pointer to an array of xmlChar
12697 *
12698 * parse an XML in-memory document and build a tree.
12699 * In the case the document is not Well Formed, a tree is built anyway
12700 *
12701 * Returns the resulting document tree
12702 */
12703
12704xmlDocPtr
12705xmlRecoverDoc(xmlChar *cur) {
12706 return(xmlSAXParseDoc(NULL, cur, 1));
12707}
12708
12709/**
12710 * xmlParseFile:
12711 * @filename: the filename
12712 *
12713 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12714 * compressed document is provided by default if found at compile-time.
12715 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012716 * Returns the resulting document tree if the file was wellformed,
12717 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012718 */
12719
12720xmlDocPtr
12721xmlParseFile(const char *filename) {
12722 return(xmlSAXParseFile(NULL, filename, 0));
12723}
12724
12725/**
12726 * xmlRecoverFile:
12727 * @filename: the filename
12728 *
12729 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12730 * compressed document is provided by default if found at compile-time.
12731 * In the case the document is not Well Formed, a tree is built anyway
12732 *
12733 * Returns the resulting document tree
12734 */
12735
12736xmlDocPtr
12737xmlRecoverFile(const char *filename) {
12738 return(xmlSAXParseFile(NULL, filename, 1));
12739}
12740
12741
12742/**
12743 * xmlSetupParserForBuffer:
12744 * @ctxt: an XML parser context
12745 * @buffer: a xmlChar * buffer
12746 * @filename: a file name
12747 *
12748 * Setup the parser context to parse a new buffer; Clears any prior
12749 * contents from the parser context. The buffer parameter must not be
12750 * NULL, but the filename parameter can be
12751 */
12752void
12753xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12754 const char* filename)
12755{
12756 xmlParserInputPtr input;
12757
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012758 if ((ctxt == NULL) || (buffer == NULL))
12759 return;
12760
Owen Taylor3473f882001-02-23 17:55:21 +000012761 input = xmlNewInputStream(ctxt);
12762 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012763 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012764 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012765 return;
12766 }
12767
12768 xmlClearParserCtxt(ctxt);
12769 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012770 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012771 input->base = buffer;
12772 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012773 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012774 inputPush(ctxt, input);
12775}
12776
12777/**
12778 * xmlSAXUserParseFile:
12779 * @sax: a SAX handler
12780 * @user_data: The user data returned on SAX callbacks
12781 * @filename: a file name
12782 *
12783 * parse an XML file and call the given SAX handler routines.
12784 * Automatic support for ZLIB/Compress compressed document is provided
12785 *
12786 * Returns 0 in case of success or a error number otherwise
12787 */
12788int
12789xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12790 const char *filename) {
12791 int ret = 0;
12792 xmlParserCtxtPtr ctxt;
12793
12794 ctxt = xmlCreateFileParserCtxt(filename);
12795 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012796 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012797 xmlFree(ctxt->sax);
12798 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012799 xmlDetectSAX2(ctxt);
12800
Owen Taylor3473f882001-02-23 17:55:21 +000012801 if (user_data != NULL)
12802 ctxt->userData = user_data;
12803
12804 xmlParseDocument(ctxt);
12805
12806 if (ctxt->wellFormed)
12807 ret = 0;
12808 else {
12809 if (ctxt->errNo != 0)
12810 ret = ctxt->errNo;
12811 else
12812 ret = -1;
12813 }
12814 if (sax != NULL)
12815 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012816 if (ctxt->myDoc != NULL) {
12817 xmlFreeDoc(ctxt->myDoc);
12818 ctxt->myDoc = NULL;
12819 }
Owen Taylor3473f882001-02-23 17:55:21 +000012820 xmlFreeParserCtxt(ctxt);
12821
12822 return ret;
12823}
Daniel Veillard81273902003-09-30 00:43:48 +000012824#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012825
12826/************************************************************************
12827 * *
12828 * Front ends when parsing from memory *
12829 * *
12830 ************************************************************************/
12831
12832/**
12833 * xmlCreateMemoryParserCtxt:
12834 * @buffer: a pointer to a char array
12835 * @size: the size of the array
12836 *
12837 * Create a parser context for an XML in-memory document.
12838 *
12839 * Returns the new parser context or NULL
12840 */
12841xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012842xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012843 xmlParserCtxtPtr ctxt;
12844 xmlParserInputPtr input;
12845 xmlParserInputBufferPtr buf;
12846
12847 if (buffer == NULL)
12848 return(NULL);
12849 if (size <= 0)
12850 return(NULL);
12851
12852 ctxt = xmlNewParserCtxt();
12853 if (ctxt == NULL)
12854 return(NULL);
12855
Daniel Veillard53350552003-09-18 13:35:51 +000012856 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012857 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012858 if (buf == NULL) {
12859 xmlFreeParserCtxt(ctxt);
12860 return(NULL);
12861 }
Owen Taylor3473f882001-02-23 17:55:21 +000012862
12863 input = xmlNewInputStream(ctxt);
12864 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012865 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012866 xmlFreeParserCtxt(ctxt);
12867 return(NULL);
12868 }
12869
12870 input->filename = NULL;
12871 input->buf = buf;
12872 input->base = input->buf->buffer->content;
12873 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012874 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012875
12876 inputPush(ctxt, input);
12877 return(ctxt);
12878}
12879
Daniel Veillard81273902003-09-30 00:43:48 +000012880#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012881/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012882 * xmlSAXParseMemoryWithData:
12883 * @sax: the SAX handler block
12884 * @buffer: an pointer to a char array
12885 * @size: the size of the array
12886 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12887 * documents
12888 * @data: the userdata
12889 *
12890 * parse an XML in-memory block and use the given SAX function block
12891 * to handle the parsing callback. If sax is NULL, fallback to the default
12892 * DOM tree building routines.
12893 *
12894 * User data (void *) is stored within the parser context in the
12895 * context's _private member, so it is available nearly everywhere in libxml
12896 *
12897 * Returns the resulting document tree
12898 */
12899
12900xmlDocPtr
12901xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12902 int size, int recovery, void *data) {
12903 xmlDocPtr ret;
12904 xmlParserCtxtPtr ctxt;
12905
12906 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12907 if (ctxt == NULL) return(NULL);
12908 if (sax != NULL) {
12909 if (ctxt->sax != NULL)
12910 xmlFree(ctxt->sax);
12911 ctxt->sax = sax;
12912 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012913 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012914 if (data!=NULL) {
12915 ctxt->_private=data;
12916 }
12917
Daniel Veillardadba5f12003-04-04 16:09:01 +000012918 ctxt->recovery = recovery;
12919
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012920 xmlParseDocument(ctxt);
12921
12922 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12923 else {
12924 ret = NULL;
12925 xmlFreeDoc(ctxt->myDoc);
12926 ctxt->myDoc = NULL;
12927 }
12928 if (sax != NULL)
12929 ctxt->sax = NULL;
12930 xmlFreeParserCtxt(ctxt);
12931
12932 return(ret);
12933}
12934
12935/**
Owen Taylor3473f882001-02-23 17:55:21 +000012936 * xmlSAXParseMemory:
12937 * @sax: the SAX handler block
12938 * @buffer: an pointer to a char array
12939 * @size: the size of the array
12940 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12941 * documents
12942 *
12943 * parse an XML in-memory block and use the given SAX function block
12944 * to handle the parsing callback. If sax is NULL, fallback to the default
12945 * DOM tree building routines.
12946 *
12947 * Returns the resulting document tree
12948 */
12949xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012950xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12951 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012952 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012953}
12954
12955/**
12956 * xmlParseMemory:
12957 * @buffer: an pointer to a char array
12958 * @size: the size of the array
12959 *
12960 * parse an XML in-memory block and build a tree.
12961 *
12962 * Returns the resulting document tree
12963 */
12964
Daniel Veillard50822cb2001-07-26 20:05:51 +000012965xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012966 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12967}
12968
12969/**
12970 * xmlRecoverMemory:
12971 * @buffer: an pointer to a char array
12972 * @size: the size of the array
12973 *
12974 * parse an XML in-memory block and build a tree.
12975 * In the case the document is not Well Formed, a tree is built anyway
12976 *
12977 * Returns the resulting document tree
12978 */
12979
Daniel Veillard50822cb2001-07-26 20:05:51 +000012980xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012981 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12982}
12983
12984/**
12985 * xmlSAXUserParseMemory:
12986 * @sax: a SAX handler
12987 * @user_data: The user data returned on SAX callbacks
12988 * @buffer: an in-memory XML document input
12989 * @size: the length of the XML document in bytes
12990 *
12991 * A better SAX parsing routine.
12992 * parse an XML in-memory buffer and call the given SAX handler routines.
12993 *
12994 * Returns 0 in case of success or a error number otherwise
12995 */
12996int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012997 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012998 int ret = 0;
12999 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013000
13001 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13002 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013003 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13004 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013005 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013006 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013007
Daniel Veillard30211a02001-04-26 09:33:18 +000013008 if (user_data != NULL)
13009 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013010
13011 xmlParseDocument(ctxt);
13012
13013 if (ctxt->wellFormed)
13014 ret = 0;
13015 else {
13016 if (ctxt->errNo != 0)
13017 ret = ctxt->errNo;
13018 else
13019 ret = -1;
13020 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013021 if (sax != NULL)
13022 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013023 if (ctxt->myDoc != NULL) {
13024 xmlFreeDoc(ctxt->myDoc);
13025 ctxt->myDoc = NULL;
13026 }
Owen Taylor3473f882001-02-23 17:55:21 +000013027 xmlFreeParserCtxt(ctxt);
13028
13029 return ret;
13030}
Daniel Veillard81273902003-09-30 00:43:48 +000013031#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013032
13033/**
13034 * xmlCreateDocParserCtxt:
13035 * @cur: a pointer to an array of xmlChar
13036 *
13037 * Creates a parser context for an XML in-memory document.
13038 *
13039 * Returns the new parser context or NULL
13040 */
13041xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013042xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013043 int len;
13044
13045 if (cur == NULL)
13046 return(NULL);
13047 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013049}
13050
Daniel Veillard81273902003-09-30 00:43:48 +000013051#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013052/**
13053 * xmlSAXParseDoc:
13054 * @sax: the SAX handler block
13055 * @cur: a pointer to an array of xmlChar
13056 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13057 * documents
13058 *
13059 * parse an XML in-memory document and build a tree.
13060 * It use the given SAX function block to handle the parsing callback.
13061 * If sax is NULL, fallback to the default DOM tree building routines.
13062 *
13063 * Returns the resulting document tree
13064 */
13065
13066xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013067xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013068 xmlDocPtr ret;
13069 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013070 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013071
Daniel Veillard38936062004-11-04 17:45:11 +000013072 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013073
13074
13075 ctxt = xmlCreateDocParserCtxt(cur);
13076 if (ctxt == NULL) return(NULL);
13077 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013078 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013079 ctxt->sax = sax;
13080 ctxt->userData = NULL;
13081 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013082 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013083
13084 xmlParseDocument(ctxt);
13085 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13086 else {
13087 ret = NULL;
13088 xmlFreeDoc(ctxt->myDoc);
13089 ctxt->myDoc = NULL;
13090 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013091 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013092 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013093 xmlFreeParserCtxt(ctxt);
13094
13095 return(ret);
13096}
13097
13098/**
13099 * xmlParseDoc:
13100 * @cur: a pointer to an array of xmlChar
13101 *
13102 * parse an XML in-memory document and build a tree.
13103 *
13104 * Returns the resulting document tree
13105 */
13106
13107xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013108xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013109 return(xmlSAXParseDoc(NULL, cur, 0));
13110}
Daniel Veillard81273902003-09-30 00:43:48 +000013111#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013112
Daniel Veillard81273902003-09-30 00:43:48 +000013113#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013114/************************************************************************
13115 * *
13116 * Specific function to keep track of entities references *
13117 * and used by the XSLT debugger *
13118 * *
13119 ************************************************************************/
13120
13121static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13122
13123/**
13124 * xmlAddEntityReference:
13125 * @ent : A valid entity
13126 * @firstNode : A valid first node for children of entity
13127 * @lastNode : A valid last node of children entity
13128 *
13129 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13130 */
13131static void
13132xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13133 xmlNodePtr lastNode)
13134{
13135 if (xmlEntityRefFunc != NULL) {
13136 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13137 }
13138}
13139
13140
13141/**
13142 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013143 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013144 *
13145 * Set the function to call call back when a xml reference has been made
13146 */
13147void
13148xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13149{
13150 xmlEntityRefFunc = func;
13151}
Daniel Veillard81273902003-09-30 00:43:48 +000013152#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013153
13154/************************************************************************
13155 * *
13156 * Miscellaneous *
13157 * *
13158 ************************************************************************/
13159
13160#ifdef LIBXML_XPATH_ENABLED
13161#include <libxml/xpath.h>
13162#endif
13163
Daniel Veillardffa3c742005-07-21 13:24:09 +000013164extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013165static int xmlParserInitialized = 0;
13166
13167/**
13168 * xmlInitParser:
13169 *
13170 * Initialization function for the XML parser.
13171 * This is not reentrant. Call once before processing in case of
13172 * use in multithreaded programs.
13173 */
13174
13175void
13176xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013177 if (xmlParserInitialized != 0)
13178 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013179
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013180#ifdef LIBXML_THREAD_ENABLED
13181 __xmlGlobalInitMutexLock();
13182 if (xmlParserInitialized == 0) {
13183#endif
13184 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13185 (xmlGenericError == NULL))
13186 initGenericErrorDefaultFunc(NULL);
13187 xmlInitGlobals();
13188 xmlInitThreads();
13189 xmlInitMemory();
13190 xmlInitCharEncodingHandlers();
13191 xmlDefaultSAXHandlerInit();
13192 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013193#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013194 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013195#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013196#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013197 htmlInitAutoClose();
13198 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013199#endif
13200#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013201 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013202#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013203 xmlParserInitialized = 1;
13204#ifdef LIBXML_THREAD_ENABLED
13205 }
13206 __xmlGlobalInitMutexUnlock();
13207#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013208}
13209
13210/**
13211 * xmlCleanupParser:
13212 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013213 * This function name is somewhat misleading. It does not clean up
13214 * parser state, it cleans up memory allocated by the library itself.
13215 * It is a cleanup function for the XML library. It tries to reclaim all
13216 * related global memory allocated for the library processing.
13217 * It doesn't deallocate any document related memory. One should
13218 * call xmlCleanupParser() only when the process has finished using
13219 * the library and all XML/HTML documents built with it.
13220 * See also xmlInitParser() which has the opposite function of preparing
13221 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013222 */
13223
13224void
13225xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013226 if (!xmlParserInitialized)
13227 return;
13228
Owen Taylor3473f882001-02-23 17:55:21 +000013229 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013230#ifdef LIBXML_CATALOG_ENABLED
13231 xmlCatalogCleanup();
13232#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013233 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013234 xmlCleanupInputCallbacks();
13235#ifdef LIBXML_OUTPUT_ENABLED
13236 xmlCleanupOutputCallbacks();
13237#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013238#ifdef LIBXML_SCHEMAS_ENABLED
13239 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013240 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013241#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013242 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013243 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013244 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013245 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013246 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013247}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013248
13249/************************************************************************
13250 * *
13251 * New set (2.6.0) of simpler and more flexible APIs *
13252 * *
13253 ************************************************************************/
13254
13255/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013256 * DICT_FREE:
13257 * @str: a string
13258 *
13259 * Free a string if it is not owned by the "dict" dictionnary in the
13260 * current scope
13261 */
13262#define DICT_FREE(str) \
13263 if ((str) && ((!dict) || \
13264 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13265 xmlFree((char *)(str));
13266
13267/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013268 * xmlCtxtReset:
13269 * @ctxt: an XML parser context
13270 *
13271 * Reset a parser context
13272 */
13273void
13274xmlCtxtReset(xmlParserCtxtPtr ctxt)
13275{
13276 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013277 xmlDictPtr dict;
13278
13279 if (ctxt == NULL)
13280 return;
13281
13282 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013283
13284 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13285 xmlFreeInputStream(input);
13286 }
13287 ctxt->inputNr = 0;
13288 ctxt->input = NULL;
13289
13290 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013291 if (ctxt->spaceTab != NULL) {
13292 ctxt->spaceTab[0] = -1;
13293 ctxt->space = &ctxt->spaceTab[0];
13294 } else {
13295 ctxt->space = NULL;
13296 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013297
13298
13299 ctxt->nodeNr = 0;
13300 ctxt->node = NULL;
13301
13302 ctxt->nameNr = 0;
13303 ctxt->name = NULL;
13304
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013305 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013306 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013307 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013308 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013309 DICT_FREE(ctxt->directory);
13310 ctxt->directory = NULL;
13311 DICT_FREE(ctxt->extSubURI);
13312 ctxt->extSubURI = NULL;
13313 DICT_FREE(ctxt->extSubSystem);
13314 ctxt->extSubSystem = NULL;
13315 if (ctxt->myDoc != NULL)
13316 xmlFreeDoc(ctxt->myDoc);
13317 ctxt->myDoc = NULL;
13318
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013319 ctxt->standalone = -1;
13320 ctxt->hasExternalSubset = 0;
13321 ctxt->hasPErefs = 0;
13322 ctxt->html = 0;
13323 ctxt->external = 0;
13324 ctxt->instate = XML_PARSER_START;
13325 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013326
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327 ctxt->wellFormed = 1;
13328 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013329 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013330 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013331#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013332 ctxt->vctxt.userData = ctxt;
13333 ctxt->vctxt.error = xmlParserValidityError;
13334 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013335#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013336 ctxt->record_info = 0;
13337 ctxt->nbChars = 0;
13338 ctxt->checkIndex = 0;
13339 ctxt->inSubset = 0;
13340 ctxt->errNo = XML_ERR_OK;
13341 ctxt->depth = 0;
13342 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13343 ctxt->catalogs = NULL;
13344 xmlInitNodeInfoSeq(&ctxt->node_seq);
13345
13346 if (ctxt->attsDefault != NULL) {
13347 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13348 ctxt->attsDefault = NULL;
13349 }
13350 if (ctxt->attsSpecial != NULL) {
13351 xmlHashFree(ctxt->attsSpecial, NULL);
13352 ctxt->attsSpecial = NULL;
13353 }
13354
Daniel Veillard4432df22003-09-28 18:58:27 +000013355#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013356 if (ctxt->catalogs != NULL)
13357 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013358#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013359 if (ctxt->lastError.code != XML_ERR_OK)
13360 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013361}
13362
13363/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013364 * xmlCtxtResetPush:
13365 * @ctxt: an XML parser context
13366 * @chunk: a pointer to an array of chars
13367 * @size: number of chars in the array
13368 * @filename: an optional file name or URI
13369 * @encoding: the document encoding, or NULL
13370 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013371 * Reset a push parser context
13372 *
13373 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013374 */
13375int
13376xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13377 int size, const char *filename, const char *encoding)
13378{
13379 xmlParserInputPtr inputStream;
13380 xmlParserInputBufferPtr buf;
13381 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13382
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013383 if (ctxt == NULL)
13384 return(1);
13385
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013386 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13387 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13388
13389 buf = xmlAllocParserInputBuffer(enc);
13390 if (buf == NULL)
13391 return(1);
13392
13393 if (ctxt == NULL) {
13394 xmlFreeParserInputBuffer(buf);
13395 return(1);
13396 }
13397
13398 xmlCtxtReset(ctxt);
13399
13400 if (ctxt->pushTab == NULL) {
13401 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13402 sizeof(xmlChar *));
13403 if (ctxt->pushTab == NULL) {
13404 xmlErrMemory(ctxt, NULL);
13405 xmlFreeParserInputBuffer(buf);
13406 return(1);
13407 }
13408 }
13409
13410 if (filename == NULL) {
13411 ctxt->directory = NULL;
13412 } else {
13413 ctxt->directory = xmlParserGetDirectory(filename);
13414 }
13415
13416 inputStream = xmlNewInputStream(ctxt);
13417 if (inputStream == NULL) {
13418 xmlFreeParserInputBuffer(buf);
13419 return(1);
13420 }
13421
13422 if (filename == NULL)
13423 inputStream->filename = NULL;
13424 else
13425 inputStream->filename = (char *)
13426 xmlCanonicPath((const xmlChar *) filename);
13427 inputStream->buf = buf;
13428 inputStream->base = inputStream->buf->buffer->content;
13429 inputStream->cur = inputStream->buf->buffer->content;
13430 inputStream->end =
13431 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13432
13433 inputPush(ctxt, inputStream);
13434
13435 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13436 (ctxt->input->buf != NULL)) {
13437 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13438 int cur = ctxt->input->cur - ctxt->input->base;
13439
13440 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13441
13442 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13443 ctxt->input->cur = ctxt->input->base + cur;
13444 ctxt->input->end =
13445 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13446 use];
13447#ifdef DEBUG_PUSH
13448 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13449#endif
13450 }
13451
13452 if (encoding != NULL) {
13453 xmlCharEncodingHandlerPtr hdlr;
13454
13455 hdlr = xmlFindCharEncodingHandler(encoding);
13456 if (hdlr != NULL) {
13457 xmlSwitchToEncoding(ctxt, hdlr);
13458 } else {
13459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13460 "Unsupported encoding %s\n", BAD_CAST encoding);
13461 }
13462 } else if (enc != XML_CHAR_ENCODING_NONE) {
13463 xmlSwitchEncoding(ctxt, enc);
13464 }
13465
13466 return(0);
13467}
13468
13469/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013470 * xmlCtxtUseOptions:
13471 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013472 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013473 *
13474 * Applies the options to the parser context
13475 *
13476 * Returns 0 in case of success, the set of unknown or unimplemented options
13477 * in case of error.
13478 */
13479int
13480xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13481{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013482 if (ctxt == NULL)
13483 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013484 if (options & XML_PARSE_RECOVER) {
13485 ctxt->recovery = 1;
13486 options -= XML_PARSE_RECOVER;
13487 } else
13488 ctxt->recovery = 0;
13489 if (options & XML_PARSE_DTDLOAD) {
13490 ctxt->loadsubset = XML_DETECT_IDS;
13491 options -= XML_PARSE_DTDLOAD;
13492 } else
13493 ctxt->loadsubset = 0;
13494 if (options & XML_PARSE_DTDATTR) {
13495 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13496 options -= XML_PARSE_DTDATTR;
13497 }
13498 if (options & XML_PARSE_NOENT) {
13499 ctxt->replaceEntities = 1;
13500 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13501 options -= XML_PARSE_NOENT;
13502 } else
13503 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013504 if (options & XML_PARSE_PEDANTIC) {
13505 ctxt->pedantic = 1;
13506 options -= XML_PARSE_PEDANTIC;
13507 } else
13508 ctxt->pedantic = 0;
13509 if (options & XML_PARSE_NOBLANKS) {
13510 ctxt->keepBlanks = 0;
13511 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13512 options -= XML_PARSE_NOBLANKS;
13513 } else
13514 ctxt->keepBlanks = 1;
13515 if (options & XML_PARSE_DTDVALID) {
13516 ctxt->validate = 1;
13517 if (options & XML_PARSE_NOWARNING)
13518 ctxt->vctxt.warning = NULL;
13519 if (options & XML_PARSE_NOERROR)
13520 ctxt->vctxt.error = NULL;
13521 options -= XML_PARSE_DTDVALID;
13522 } else
13523 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013524 if (options & XML_PARSE_NOWARNING) {
13525 ctxt->sax->warning = NULL;
13526 options -= XML_PARSE_NOWARNING;
13527 }
13528 if (options & XML_PARSE_NOERROR) {
13529 ctxt->sax->error = NULL;
13530 ctxt->sax->fatalError = NULL;
13531 options -= XML_PARSE_NOERROR;
13532 }
Daniel Veillard81273902003-09-30 00:43:48 +000013533#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013534 if (options & XML_PARSE_SAX1) {
13535 ctxt->sax->startElement = xmlSAX2StartElement;
13536 ctxt->sax->endElement = xmlSAX2EndElement;
13537 ctxt->sax->startElementNs = NULL;
13538 ctxt->sax->endElementNs = NULL;
13539 ctxt->sax->initialized = 1;
13540 options -= XML_PARSE_SAX1;
13541 }
Daniel Veillard81273902003-09-30 00:43:48 +000013542#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013543 if (options & XML_PARSE_NODICT) {
13544 ctxt->dictNames = 0;
13545 options -= XML_PARSE_NODICT;
13546 } else {
13547 ctxt->dictNames = 1;
13548 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013549 if (options & XML_PARSE_NOCDATA) {
13550 ctxt->sax->cdataBlock = NULL;
13551 options -= XML_PARSE_NOCDATA;
13552 }
13553 if (options & XML_PARSE_NSCLEAN) {
13554 ctxt->options |= XML_PARSE_NSCLEAN;
13555 options -= XML_PARSE_NSCLEAN;
13556 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013557 if (options & XML_PARSE_NONET) {
13558 ctxt->options |= XML_PARSE_NONET;
13559 options -= XML_PARSE_NONET;
13560 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013561 if (options & XML_PARSE_COMPACT) {
13562 ctxt->options |= XML_PARSE_COMPACT;
13563 options -= XML_PARSE_COMPACT;
13564 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013565 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013566 return (options);
13567}
13568
13569/**
13570 * xmlDoRead:
13571 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013572 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013573 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013574 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013575 * @reuse: keep the context for reuse
13576 *
13577 * Common front-end for the xmlRead functions
13578 *
13579 * Returns the resulting document tree or NULL
13580 */
13581static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013582xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13583 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013584{
13585 xmlDocPtr ret;
13586
13587 xmlCtxtUseOptions(ctxt, options);
13588 if (encoding != NULL) {
13589 xmlCharEncodingHandlerPtr hdlr;
13590
13591 hdlr = xmlFindCharEncodingHandler(encoding);
13592 if (hdlr != NULL)
13593 xmlSwitchToEncoding(ctxt, hdlr);
13594 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013595 if ((URL != NULL) && (ctxt->input != NULL) &&
13596 (ctxt->input->filename == NULL))
13597 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013598 xmlParseDocument(ctxt);
13599 if ((ctxt->wellFormed) || ctxt->recovery)
13600 ret = ctxt->myDoc;
13601 else {
13602 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013603 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013604 xmlFreeDoc(ctxt->myDoc);
13605 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013606 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013607 ctxt->myDoc = NULL;
13608 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013609 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013610 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013611
13612 return (ret);
13613}
13614
13615/**
13616 * xmlReadDoc:
13617 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013618 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013619 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013620 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013621 *
13622 * parse an XML in-memory document and build a tree.
13623 *
13624 * Returns the resulting document tree
13625 */
13626xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013627xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013628{
13629 xmlParserCtxtPtr ctxt;
13630
13631 if (cur == NULL)
13632 return (NULL);
13633
13634 ctxt = xmlCreateDocParserCtxt(cur);
13635 if (ctxt == NULL)
13636 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013637 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013638}
13639
13640/**
13641 * xmlReadFile:
13642 * @filename: a file or URL
13643 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013644 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013645 *
13646 * parse an XML file from the filesystem or the network.
13647 *
13648 * Returns the resulting document tree
13649 */
13650xmlDocPtr
13651xmlReadFile(const char *filename, const char *encoding, int options)
13652{
13653 xmlParserCtxtPtr ctxt;
13654
Daniel Veillard61b93382003-11-03 14:28:31 +000013655 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013656 if (ctxt == NULL)
13657 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013658 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013659}
13660
13661/**
13662 * xmlReadMemory:
13663 * @buffer: a pointer to a char array
13664 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013665 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013666 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013667 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013668 *
13669 * parse an XML in-memory document and build a tree.
13670 *
13671 * Returns the resulting document tree
13672 */
13673xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013674xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013675{
13676 xmlParserCtxtPtr ctxt;
13677
13678 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13679 if (ctxt == NULL)
13680 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013681 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013682}
13683
13684/**
13685 * xmlReadFd:
13686 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013687 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013688 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013689 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013690 *
13691 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013692 * NOTE that the file descriptor will not be closed when the
13693 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013694 *
13695 * Returns the resulting document tree
13696 */
13697xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013698xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013699{
13700 xmlParserCtxtPtr ctxt;
13701 xmlParserInputBufferPtr input;
13702 xmlParserInputPtr stream;
13703
13704 if (fd < 0)
13705 return (NULL);
13706
13707 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13708 if (input == NULL)
13709 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013710 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013711 ctxt = xmlNewParserCtxt();
13712 if (ctxt == NULL) {
13713 xmlFreeParserInputBuffer(input);
13714 return (NULL);
13715 }
13716 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13717 if (stream == NULL) {
13718 xmlFreeParserInputBuffer(input);
13719 xmlFreeParserCtxt(ctxt);
13720 return (NULL);
13721 }
13722 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013723 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013724}
13725
13726/**
13727 * xmlReadIO:
13728 * @ioread: an I/O read function
13729 * @ioclose: an I/O close function
13730 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013731 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013732 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013733 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013734 *
13735 * parse an XML document from I/O functions and source and build a tree.
13736 *
13737 * Returns the resulting document tree
13738 */
13739xmlDocPtr
13740xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013741 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013742{
13743 xmlParserCtxtPtr ctxt;
13744 xmlParserInputBufferPtr input;
13745 xmlParserInputPtr stream;
13746
13747 if (ioread == NULL)
13748 return (NULL);
13749
13750 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13751 XML_CHAR_ENCODING_NONE);
13752 if (input == NULL)
13753 return (NULL);
13754 ctxt = xmlNewParserCtxt();
13755 if (ctxt == NULL) {
13756 xmlFreeParserInputBuffer(input);
13757 return (NULL);
13758 }
13759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13760 if (stream == NULL) {
13761 xmlFreeParserInputBuffer(input);
13762 xmlFreeParserCtxt(ctxt);
13763 return (NULL);
13764 }
13765 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013766 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013767}
13768
13769/**
13770 * xmlCtxtReadDoc:
13771 * @ctxt: an XML parser context
13772 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013773 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013774 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013775 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013776 *
13777 * parse an XML in-memory document and build a tree.
13778 * This reuses the existing @ctxt parser context
13779 *
13780 * Returns the resulting document tree
13781 */
13782xmlDocPtr
13783xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013784 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013785{
13786 xmlParserInputPtr stream;
13787
13788 if (cur == NULL)
13789 return (NULL);
13790 if (ctxt == NULL)
13791 return (NULL);
13792
13793 xmlCtxtReset(ctxt);
13794
13795 stream = xmlNewStringInputStream(ctxt, cur);
13796 if (stream == NULL) {
13797 return (NULL);
13798 }
13799 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013800 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013801}
13802
13803/**
13804 * xmlCtxtReadFile:
13805 * @ctxt: an XML parser context
13806 * @filename: a file or URL
13807 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013808 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013809 *
13810 * parse an XML file from the filesystem or the network.
13811 * This reuses the existing @ctxt parser context
13812 *
13813 * Returns the resulting document tree
13814 */
13815xmlDocPtr
13816xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13817 const char *encoding, int options)
13818{
13819 xmlParserInputPtr stream;
13820
13821 if (filename == NULL)
13822 return (NULL);
13823 if (ctxt == NULL)
13824 return (NULL);
13825
13826 xmlCtxtReset(ctxt);
13827
Daniel Veillard29614c72004-11-26 10:47:26 +000013828 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013829 if (stream == NULL) {
13830 return (NULL);
13831 }
13832 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013833 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013834}
13835
13836/**
13837 * xmlCtxtReadMemory:
13838 * @ctxt: an XML parser context
13839 * @buffer: a pointer to a char array
13840 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013841 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013842 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013843 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013844 *
13845 * parse an XML in-memory document and build a tree.
13846 * This reuses the existing @ctxt parser context
13847 *
13848 * Returns the resulting document tree
13849 */
13850xmlDocPtr
13851xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013852 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013853{
13854 xmlParserInputBufferPtr input;
13855 xmlParserInputPtr stream;
13856
13857 if (ctxt == NULL)
13858 return (NULL);
13859 if (buffer == NULL)
13860 return (NULL);
13861
13862 xmlCtxtReset(ctxt);
13863
13864 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13865 if (input == NULL) {
13866 return(NULL);
13867 }
13868
13869 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13870 if (stream == NULL) {
13871 xmlFreeParserInputBuffer(input);
13872 return(NULL);
13873 }
13874
13875 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013876 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013877}
13878
13879/**
13880 * xmlCtxtReadFd:
13881 * @ctxt: an XML parser context
13882 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013883 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013884 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013885 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013886 *
13887 * parse an XML from a file descriptor and build a tree.
13888 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013889 * NOTE that the file descriptor will not be closed when the
13890 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013891 *
13892 * Returns the resulting document tree
13893 */
13894xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013895xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13896 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013897{
13898 xmlParserInputBufferPtr input;
13899 xmlParserInputPtr stream;
13900
13901 if (fd < 0)
13902 return (NULL);
13903 if (ctxt == NULL)
13904 return (NULL);
13905
13906 xmlCtxtReset(ctxt);
13907
13908
13909 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13910 if (input == NULL)
13911 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013912 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013913 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13914 if (stream == NULL) {
13915 xmlFreeParserInputBuffer(input);
13916 return (NULL);
13917 }
13918 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013919 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013920}
13921
13922/**
13923 * xmlCtxtReadIO:
13924 * @ctxt: an XML parser context
13925 * @ioread: an I/O read function
13926 * @ioclose: an I/O close function
13927 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013928 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013929 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013930 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931 *
13932 * parse an XML document from I/O functions and source and build a tree.
13933 * This reuses the existing @ctxt parser context
13934 *
13935 * Returns the resulting document tree
13936 */
13937xmlDocPtr
13938xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13939 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013940 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013941 const char *encoding, int options)
13942{
13943 xmlParserInputBufferPtr input;
13944 xmlParserInputPtr stream;
13945
13946 if (ioread == NULL)
13947 return (NULL);
13948 if (ctxt == NULL)
13949 return (NULL);
13950
13951 xmlCtxtReset(ctxt);
13952
13953 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13954 XML_CHAR_ENCODING_NONE);
13955 if (input == NULL)
13956 return (NULL);
13957 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13958 if (stream == NULL) {
13959 xmlFreeParserInputBuffer(input);
13960 return (NULL);
13961 }
13962 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013963 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013964}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013965
13966#define bottom_parser
13967#include "elfgcchack.h"