blob: 6861650562bc8fb17f5657b670b3a92b3959bd36 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1020 defaults, NULL) < 0) {
1021 xmlFree(defaults);
1022 goto mem_error;
1023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 xmlDefAttrsPtr temp;
1026
1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001029 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001031 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1034 defaults, NULL) < 0) {
1035 xmlFree(defaults);
1036 goto mem_error;
1037 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001038 }
1039
1040 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001041 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001042 * are within the DTD and hen not associated to namespace names.
1043 */
1044 name = xmlSplitQName3(fullattr, &len);
1045 if (name == NULL) {
1046 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1047 prefix = NULL;
1048 } else {
1049 name = xmlDictLookup(ctxt->dict, name, -1);
1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1051 }
1052
1053 defaults->values[4 * defaults->nbAttrs] = name;
1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1055 /* intern the string and precompute the end */
1056 len = xmlStrlen(value);
1057 value = xmlDictLookup(ctxt->dict, value, len);
1058 defaults->values[4 * defaults->nbAttrs + 2] = value;
1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1060 defaults->nbAttrs++;
1061
1062 return;
1063
1064mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001066 return;
1067}
1068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069/**
1070 * xmlAddSpecialAttr:
1071 * @ctxt: an XML parser context
1072 * @fullname: the element fullname
1073 * @fullattr: the attribute fullname
1074 * @type: the attribute type
1075 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001076 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001077 */
1078static void
1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1080 const xmlChar *fullname,
1081 const xmlChar *fullattr,
1082 int type)
1083{
1084 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001086 if (ctxt->attsSpecial == NULL)
1087 goto mem_error;
1088 }
1089
Daniel Veillardac4118d2008-01-11 05:27:32 +00001090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1091 return;
1092
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1094 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001095 return;
1096
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001099 return;
1100}
1101
Daniel Veillard4432df22003-09-28 18:58:27 +00001102/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001103 * xmlCleanSpecialAttrCallback:
1104 *
1105 * Removes CDATA attributes from the special attribute table
1106 */
1107static void
1108xmlCleanSpecialAttrCallback(void *payload, void *data,
1109 const xmlChar *fullname, const xmlChar *fullattr,
1110 const xmlChar *unused ATTRIBUTE_UNUSED) {
1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1112
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001113 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1115 }
1116}
1117
1118/**
1119 * xmlCleanSpecialAttr:
1120 * @ctxt: an XML parser context
1121 *
1122 * Trim the list of attributes defined to remove all those of type
1123 * CDATA as they are not special. This call should be done when finishing
1124 * to parse the DTD and before starting to parse the document root.
1125 */
1126static void
1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1128{
1129 if (ctxt->attsSpecial == NULL)
1130 return;
1131
1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1133
1134 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1135 xmlHashFree(ctxt->attsSpecial, NULL);
1136 ctxt->attsSpecial = NULL;
1137 }
1138 return;
1139}
1140
1141/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001142 * xmlCheckLanguageID:
1143 * @lang: pointer to the string value
1144 *
1145 * Checks that the value conforms to the LanguageID production:
1146 *
1147 * NOTE: this is somewhat deprecated, those productions were removed from
1148 * the XML Second edition.
1149 *
1150 * [33] LanguageID ::= Langcode ('-' Subcode)*
1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1155 * [38] Subcode ::= ([a-z] | [A-Z])+
1156 *
1157 * Returns 1 if correct 0 otherwise
1158 **/
1159int
1160xmlCheckLanguageID(const xmlChar * lang)
1161{
1162 const xmlChar *cur = lang;
1163
1164 if (cur == NULL)
1165 return (0);
1166 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1167 ((cur[0] == 'I') && (cur[1] == '-'))) {
1168 /*
1169 * IANA code
1170 */
1171 cur += 2;
1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1173 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1174 cur++;
1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1176 ((cur[0] == 'X') && (cur[1] == '-'))) {
1177 /*
1178 * User code
1179 */
1180 cur += 2;
1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1182 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1183 cur++;
1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1186 /*
1187 * ISO639
1188 */
1189 cur++;
1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1191 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1192 cur++;
1193 else
1194 return (0);
1195 } else
1196 return (0);
1197 while (cur[0] != 0) { /* non input consuming */
1198 if (cur[0] != '-')
1199 return (0);
1200 cur++;
1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1202 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1203 cur++;
1204 else
1205 return (0);
1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1207 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1208 cur++;
1209 }
1210 return (1);
1211}
1212
Owen Taylor3473f882001-02-23 17:55:21 +00001213/************************************************************************
1214 * *
1215 * Parser stacks related functions and macros *
1216 * *
1217 ************************************************************************/
1218
1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1220 const xmlChar ** str);
1221
Daniel Veillard0fb18932003-09-07 09:14:37 +00001222#ifdef SAX2
1223/**
1224 * nsPush:
1225 * @ctxt: an XML parser context
1226 * @prefix: the namespace prefix or NULL
1227 * @URL: the namespace name
1228 *
1229 * Pushes a new parser namespace on top of the ns stack
1230 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001231 * Returns -1 in case of error, -2 if the namespace should be discarded
1232 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001233 */
1234static int
1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1236{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001237 if (ctxt->options & XML_PARSE_NSCLEAN) {
1238 int i;
1239 for (i = 0;i < ctxt->nsNr;i += 2) {
1240 if (ctxt->nsTab[i] == prefix) {
1241 /* in scope */
1242 if (ctxt->nsTab[i + 1] == URL)
1243 return(-2);
1244 /* out of scope keep it */
1245 break;
1246 }
1247 }
1248 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1250 ctxt->nsMax = 10;
1251 ctxt->nsNr = 0;
1252 ctxt->nsTab = (const xmlChar **)
1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1254 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001256 ctxt->nsMax = 0;
1257 return (-1);
1258 }
1259 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001260 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001261 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001262 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1263 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1264 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001265 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001266 ctxt->nsMax /= 2;
1267 return (-1);
1268 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001269 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001270 }
1271 ctxt->nsTab[ctxt->nsNr++] = prefix;
1272 ctxt->nsTab[ctxt->nsNr++] = URL;
1273 return (ctxt->nsNr);
1274}
1275/**
1276 * nsPop:
1277 * @ctxt: an XML parser context
1278 * @nr: the number to pop
1279 *
1280 * Pops the top @nr parser prefix/namespace from the ns stack
1281 *
1282 * Returns the number of namespaces removed
1283 */
1284static int
1285nsPop(xmlParserCtxtPtr ctxt, int nr)
1286{
1287 int i;
1288
1289 if (ctxt->nsTab == NULL) return(0);
1290 if (ctxt->nsNr < nr) {
1291 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1292 nr = ctxt->nsNr;
1293 }
1294 if (ctxt->nsNr <= 0)
1295 return (0);
1296
1297 for (i = 0;i < nr;i++) {
1298 ctxt->nsNr--;
1299 ctxt->nsTab[ctxt->nsNr] = NULL;
1300 }
1301 return(nr);
1302}
1303#endif
1304
1305static int
1306xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1307 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001309 int maxatts;
1310
1311 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001312 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001313 atts = (const xmlChar **)
1314 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001315 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001316 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001317 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1318 if (attallocs == NULL) goto mem_error;
1319 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001320 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001321 } else if (nr + 5 > ctxt->maxatts) {
1322 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001323 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1324 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001326 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001327 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1328 (maxatts / 5) * sizeof(int));
1329 if (attallocs == NULL) goto mem_error;
1330 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001331 ctxt->maxatts = maxatts;
1332 }
1333 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001334mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001335 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001336 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001337}
1338
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001339/**
1340 * inputPush:
1341 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001342 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001343 *
1344 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001345 *
1346 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001349inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1350{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001351 if ((ctxt == NULL) || (value == NULL))
1352 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->inputNr >= ctxt->inputMax) {
1354 ctxt->inputMax *= 2;
1355 ctxt->inputTab =
1356 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1357 ctxt->inputMax *
1358 sizeof(ctxt->inputTab[0]));
1359 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001360 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001361 return (0);
1362 }
1363 }
1364 ctxt->inputTab[ctxt->inputNr] = value;
1365 ctxt->input = value;
1366 return (ctxt->inputNr++);
1367}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001368/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001369 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001370 * @ctxt: an XML parser context
1371 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001373 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001374 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001375 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001376xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001377inputPop(xmlParserCtxtPtr ctxt)
1378{
1379 xmlParserInputPtr ret;
1380
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001381 if (ctxt == NULL)
1382 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001384 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 ctxt->inputNr--;
1386 if (ctxt->inputNr > 0)
1387 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1388 else
1389 ctxt->input = NULL;
1390 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001391 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001392 return (ret);
1393}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001394/**
1395 * nodePush:
1396 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001397 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001398 *
1399 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001400 *
1401 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001402 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001403int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001404nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1405{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001406 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001408 xmlNodePtr *tmp;
1409
1410 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1411 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001412 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001413 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001415 return (0);
1416 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001417 ctxt->nodeTab = tmp;
1418 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001420 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001421 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001422 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1423 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001424 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001425 return(0);
1426 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 ctxt->nodeTab[ctxt->nodeNr] = value;
1428 ctxt->node = value;
1429 return (ctxt->nodeNr++);
1430}
1431/**
1432 * nodePop:
1433 * @ctxt: an XML parser context
1434 *
1435 * Pops the top element node from the node stack
1436 *
1437 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001438 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001439xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001440nodePop(xmlParserCtxtPtr ctxt)
1441{
1442 xmlNodePtr ret;
1443
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001444 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001446 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001447 ctxt->nodeNr--;
1448 if (ctxt->nodeNr > 0)
1449 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1450 else
1451 ctxt->node = NULL;
1452 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001453 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001454 return (ret);
1455}
Daniel Veillarda2351322004-06-27 12:08:10 +00001456
1457#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001459 * nameNsPush:
1460 * @ctxt: an XML parser context
1461 * @value: the element name
1462 * @prefix: the element prefix
1463 * @URI: the element namespace name
1464 *
1465 * Pushes a new element name/prefix/URL on top of the name stack
1466 *
1467 * Returns -1 in case of error, the index in the stack otherwise
1468 */
1469static int
1470nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1471 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1472{
1473 if (ctxt->nameNr >= ctxt->nameMax) {
1474 const xmlChar * *tmp;
1475 void **tmp2;
1476 ctxt->nameMax *= 2;
1477 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1478 ctxt->nameMax *
1479 sizeof(ctxt->nameTab[0]));
1480 if (tmp == NULL) {
1481 ctxt->nameMax /= 2;
1482 goto mem_error;
1483 }
1484 ctxt->nameTab = tmp;
1485 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1486 ctxt->nameMax * 3 *
1487 sizeof(ctxt->pushTab[0]));
1488 if (tmp2 == NULL) {
1489 ctxt->nameMax /= 2;
1490 goto mem_error;
1491 }
1492 ctxt->pushTab = tmp2;
1493 }
1494 ctxt->nameTab[ctxt->nameNr] = value;
1495 ctxt->name = value;
1496 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1497 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001498 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001499 return (ctxt->nameNr++);
1500mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001501 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001502 return (-1);
1503}
1504/**
1505 * nameNsPop:
1506 * @ctxt: an XML parser context
1507 *
1508 * Pops the top element/prefix/URI name from the name stack
1509 *
1510 * Returns the name just removed
1511 */
1512static const xmlChar *
1513nameNsPop(xmlParserCtxtPtr ctxt)
1514{
1515 const xmlChar *ret;
1516
1517 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001518 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001519 ctxt->nameNr--;
1520 if (ctxt->nameNr > 0)
1521 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1522 else
1523 ctxt->name = NULL;
1524 ret = ctxt->nameTab[ctxt->nameNr];
1525 ctxt->nameTab[ctxt->nameNr] = NULL;
1526 return (ret);
1527}
Daniel Veillarda2351322004-06-27 12:08:10 +00001528#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001529
1530/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 * namePush:
1532 * @ctxt: an XML parser context
1533 * @value: the element name
1534 *
1535 * Pushes a new element name on top of the name stack
1536 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001539int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001540namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001542 if (ctxt == NULL) return (-1);
1543
Daniel Veillard1c732d22002-11-30 11:22:59 +00001544 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001545 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001546 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001547 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 ctxt->nameMax *
1549 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001550 if (tmp == NULL) {
1551 ctxt->nameMax /= 2;
1552 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001554 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001555 }
1556 ctxt->nameTab[ctxt->nameNr] = value;
1557 ctxt->name = value;
1558 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001559mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001560 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001561 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562}
1563/**
1564 * namePop:
1565 * @ctxt: an XML parser context
1566 *
1567 * Pops the top element name from the name stack
1568 *
1569 * Returns the name just removed
1570 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001571const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001572namePop(xmlParserCtxtPtr ctxt)
1573{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001574 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001575
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001576 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nameNr--;
1579 if (ctxt->nameNr > 0)
1580 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1581 else
1582 ctxt->name = NULL;
1583 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001588static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001589 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001590 int *tmp;
1591
Owen Taylor3473f882001-02-23 17:55:21 +00001592 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001593 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1594 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1595 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001596 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001597 return(0);
1598 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001599 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 ctxt->spaceTab[ctxt->spaceNr] = val;
1602 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1603 return(ctxt->spaceNr++);
1604}
1605
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001606static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001607 int ret;
1608 if (ctxt->spaceNr <= 0) return(0);
1609 ctxt->spaceNr--;
1610 if (ctxt->spaceNr > 0)
1611 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1612 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001613 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001614 ret = ctxt->spaceTab[ctxt->spaceNr];
1615 ctxt->spaceTab[ctxt->spaceNr] = -1;
1616 return(ret);
1617}
1618
1619/*
1620 * Macros for accessing the content. Those should be used only by the parser,
1621 * and not exported.
1622 *
1623 * Dirty macros, i.e. one often need to make assumption on the context to
1624 * use them
1625 *
1626 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1627 * To be used with extreme caution since operations consuming
1628 * characters may move the input buffer to a different location !
1629 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1630 * This should be used internally by the parser
1631 * only to compare to ASCII values otherwise it would break when
1632 * running with UTF-8 encoding.
1633 * RAW same as CUR but in the input buffer, bypass any token
1634 * extraction that may have been done
1635 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1636 * to compare on ASCII based substring.
1637 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001638 * strings without newlines within the parser.
1639 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1640 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001641 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1642 *
1643 * NEXT Skip to the next character, this does the proper decoding
1644 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001645 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001646 * CUR_CHAR(l) returns the current unicode character (int), set l
1647 * to the number of xmlChars used for the encoding [0-5].
1648 * CUR_SCHAR same but operate on a string instead of the context
1649 * COPY_BUF copy the current unicode char to the target buffer, increment
1650 * the index
1651 * GROW, SHRINK handling of input buffers
1652 */
1653
Daniel Veillardfdc91562002-07-01 21:52:03 +00001654#define RAW (*ctxt->input->cur)
1655#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001656#define NXT(val) ctxt->input->cur[(val)]
1657#define CUR_PTR ctxt->input->cur
1658
Daniel Veillarda07050d2003-10-19 14:46:32 +00001659#define CMP4( s, c1, c2, c3, c4 ) \
1660 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1661 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1662#define CMP5( s, c1, c2, c3, c4, c5 ) \
1663 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1664#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1665 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1666#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1667 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1668#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1669 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1670#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1671 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1672 ((unsigned char *) s)[ 8 ] == c9 )
1673#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1674 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1675 ((unsigned char *) s)[ 9 ] == c10 )
1676
Owen Taylor3473f882001-02-23 17:55:21 +00001677#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001678 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001679 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001680 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001681 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1682 xmlPopInput(ctxt); \
1683 } while (0)
1684
Daniel Veillard0b787f32004-03-26 17:29:53 +00001685#define SKIPL(val) do { \
1686 int skipl; \
1687 for(skipl=0; skipl<val; skipl++) { \
1688 if (*(ctxt->input->cur) == '\n') { \
1689 ctxt->input->line++; ctxt->input->col = 1; \
1690 } else ctxt->input->col++; \
1691 ctxt->nbChars++; \
1692 ctxt->input->cur++; \
1693 } \
1694 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1695 if ((*ctxt->input->cur == 0) && \
1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1697 xmlPopInput(ctxt); \
1698 } while (0)
1699
Daniel Veillarda880b122003-04-21 21:36:41 +00001700#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001701 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1702 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001703 xmlSHRINK (ctxt);
1704
1705static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1706 xmlParserInputShrink(ctxt->input);
1707 if ((*ctxt->input->cur == 0) &&
1708 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1709 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001710 }
Owen Taylor3473f882001-02-23 17:55:21 +00001711
Daniel Veillarda880b122003-04-21 21:36:41 +00001712#define GROW if ((ctxt->progressive == 0) && \
1713 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001714 xmlGROW (ctxt);
1715
1716static void xmlGROW (xmlParserCtxtPtr ctxt) {
1717 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1718 if ((*ctxt->input->cur == 0) &&
1719 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1720 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001721}
Owen Taylor3473f882001-02-23 17:55:21 +00001722
1723#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1724
1725#define NEXT xmlNextChar(ctxt)
1726
Daniel Veillard21a0f912001-02-25 19:54:14 +00001727#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001728 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001729 ctxt->input->cur++; \
1730 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001731 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001732 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1733 }
1734
Owen Taylor3473f882001-02-23 17:55:21 +00001735#define NEXTL(l) do { \
1736 if (*(ctxt->input->cur) == '\n') { \
1737 ctxt->input->line++; ctxt->input->col = 1; \
1738 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001739 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001740 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001741 } while (0)
1742
1743#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1744#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1745
1746#define COPY_BUF(l,b,i,v) \
1747 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001748 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001749
1750/**
1751 * xmlSkipBlankChars:
1752 * @ctxt: the XML parser context
1753 *
1754 * skip all blanks character found at that point in the input streams.
1755 * It pops up finished entities in the process if allowable at that point.
1756 *
1757 * Returns the number of space chars skipped
1758 */
1759
1760int
1761xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001762 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001763
1764 /*
1765 * It's Okay to use CUR/NEXT here since all the blanks are on
1766 * the ASCII range.
1767 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001768 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1769 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001771 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001772 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001773 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001774 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001775 if (*cur == '\n') {
1776 ctxt->input->line++; ctxt->input->col = 1;
1777 }
1778 cur++;
1779 res++;
1780 if (*cur == 0) {
1781 ctxt->input->cur = cur;
1782 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1783 cur = ctxt->input->cur;
1784 }
1785 }
1786 ctxt->input->cur = cur;
1787 } else {
1788 int cur;
1789 do {
1790 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001791 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001792 NEXT;
1793 cur = CUR;
1794 res++;
1795 }
1796 while ((cur == 0) && (ctxt->inputNr > 1) &&
1797 (ctxt->instate != XML_PARSER_COMMENT)) {
1798 xmlPopInput(ctxt);
1799 cur = CUR;
1800 }
1801 /*
1802 * Need to handle support of entities branching here
1803 */
1804 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1805 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1806 }
Owen Taylor3473f882001-02-23 17:55:21 +00001807 return(res);
1808}
1809
1810/************************************************************************
1811 * *
1812 * Commodity functions to handle entities *
1813 * *
1814 ************************************************************************/
1815
1816/**
1817 * xmlPopInput:
1818 * @ctxt: an XML parser context
1819 *
1820 * xmlPopInput: the current input pointed by ctxt->input came to an end
1821 * pop it and return the next char.
1822 *
1823 * Returns the current xmlChar in the parser context
1824 */
1825xmlChar
1826xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001827 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (xmlParserDebugEntities)
1829 xmlGenericError(xmlGenericErrorContext,
1830 "Popping input %d\n", ctxt->inputNr);
1831 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001832 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001833 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1834 return(xmlPopInput(ctxt));
1835 return(CUR);
1836}
1837
1838/**
1839 * xmlPushInput:
1840 * @ctxt: an XML parser context
1841 * @input: an XML parser input fragment (entity, XML fragment ...).
1842 *
1843 * xmlPushInput: switch to a new input stream which is stacked on top
1844 * of the previous one(s).
1845 */
1846void
1847xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1848 if (input == NULL) return;
1849
1850 if (xmlParserDebugEntities) {
1851 if ((ctxt->input != NULL) && (ctxt->input->filename))
1852 xmlGenericError(xmlGenericErrorContext,
1853 "%s(%d): ", ctxt->input->filename,
1854 ctxt->input->line);
1855 xmlGenericError(xmlGenericErrorContext,
1856 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1857 }
1858 inputPush(ctxt, input);
1859 GROW;
1860}
1861
1862/**
1863 * xmlParseCharRef:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse Reference declarations
1867 *
1868 * [66] CharRef ::= '&#' [0-9]+ ';' |
1869 * '&#x' [0-9a-fA-F]+ ';'
1870 *
1871 * [ WFC: Legal Character ]
1872 * Characters referred to using character references must match the
1873 * production for Char.
1874 *
1875 * Returns the value parsed (as an int), 0 in case of error
1876 */
1877int
1878xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001879 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001881 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001882
Owen Taylor3473f882001-02-23 17:55:21 +00001883 /*
1884 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1885 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001886 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001887 (NXT(2) == 'x')) {
1888 SKIP(3);
1889 GROW;
1890 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001891 if (count++ > 20) {
1892 count = 0;
1893 GROW;
1894 }
1895 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001896 val = val * 16 + (CUR - '0');
1897 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1898 val = val * 16 + (CUR - 'a') + 10;
1899 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1900 val = val * 16 + (CUR - 'A') + 10;
1901 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001902 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 val = 0;
1904 break;
1905 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
Owen Taylor3473f882001-02-23 17:55:21 +00001909 NEXT;
1910 count++;
1911 }
1912 if (RAW == ';') {
1913 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001914 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001915 ctxt->nbChars ++;
1916 ctxt->input->cur++;
1917 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001918 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001919 SKIP(2);
1920 GROW;
1921 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001922 if (count++ > 20) {
1923 count = 0;
1924 GROW;
1925 }
1926 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001927 val = val * 10 + (CUR - '0');
1928 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001929 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001930 val = 0;
1931 break;
1932 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001933 if (val > 0x10FFFF)
1934 outofrange = val;
1935
Owen Taylor3473f882001-02-23 17:55:21 +00001936 NEXT;
1937 count++;
1938 }
1939 if (RAW == ';') {
1940 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001941 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001942 ctxt->nbChars ++;
1943 ctxt->input->cur++;
1944 }
1945 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 }
1948
1949 /*
1950 * [ WFC: Legal Character ]
1951 * Characters referred to using character references must match the
1952 * production for Char.
1953 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001954 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001955 return(val);
1956 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001957 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1958 "xmlParseCharRef: invalid xmlChar value %d\n",
1959 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 }
1961 return(0);
1962}
1963
1964/**
1965 * xmlParseStringCharRef:
1966 * @ctxt: an XML parser context
1967 * @str: a pointer to an index in the string
1968 *
1969 * parse Reference declarations, variant parsing from a string rather
1970 * than an an input flow.
1971 *
1972 * [66] CharRef ::= '&#' [0-9]+ ';' |
1973 * '&#x' [0-9a-fA-F]+ ';'
1974 *
1975 * [ WFC: Legal Character ]
1976 * Characters referred to using character references must match the
1977 * production for Char.
1978 *
1979 * Returns the value parsed (as an int), 0 in case of error, str will be
1980 * updated to the current value of the index
1981 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001982static int
Owen Taylor3473f882001-02-23 17:55:21 +00001983xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1984 const xmlChar *ptr;
1985 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001986 unsigned int val = 0;
1987 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001988
1989 if ((str == NULL) || (*str == NULL)) return(0);
1990 ptr = *str;
1991 cur = *ptr;
1992 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1993 ptr += 3;
1994 cur = *ptr;
1995 while (cur != ';') { /* Non input consuming loop */
1996 if ((cur >= '0') && (cur <= '9'))
1997 val = val * 16 + (cur - '0');
1998 else if ((cur >= 'a') && (cur <= 'f'))
1999 val = val * 16 + (cur - 'a') + 10;
2000 else if ((cur >= 'A') && (cur <= 'F'))
2001 val = val * 16 + (cur - 'A') + 10;
2002 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002003 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002004 val = 0;
2005 break;
2006 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002007 if (val > 0x10FFFF)
2008 outofrange = val;
2009
Owen Taylor3473f882001-02-23 17:55:21 +00002010 ptr++;
2011 cur = *ptr;
2012 }
2013 if (cur == ';')
2014 ptr++;
2015 } else if ((cur == '&') && (ptr[1] == '#')){
2016 ptr += 2;
2017 cur = *ptr;
2018 while (cur != ';') { /* Non input consuming loops */
2019 if ((cur >= '0') && (cur <= '9'))
2020 val = val * 10 + (cur - '0');
2021 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002022 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002023 val = 0;
2024 break;
2025 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002026 if (val > 0x10FFFF)
2027 outofrange = val;
2028
Owen Taylor3473f882001-02-23 17:55:21 +00002029 ptr++;
2030 cur = *ptr;
2031 }
2032 if (cur == ';')
2033 ptr++;
2034 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002035 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002036 return(0);
2037 }
2038 *str = ptr;
2039
2040 /*
2041 * [ WFC: Legal Character ]
2042 * Characters referred to using character references must match the
2043 * production for Char.
2044 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002045 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002046 return(val);
2047 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002048 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2049 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2050 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002051 }
2052 return(0);
2053}
2054
2055/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002056 * xmlNewBlanksWrapperInputStream:
2057 * @ctxt: an XML parser context
2058 * @entity: an Entity pointer
2059 *
2060 * Create a new input stream for wrapping
2061 * blanks around a PEReference
2062 *
2063 * Returns the new input stream or NULL
2064 */
2065
2066static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2067
Daniel Veillardf4862f02002-09-10 11:13:43 +00002068static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002069xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2070 xmlParserInputPtr input;
2071 xmlChar *buffer;
2072 size_t length;
2073 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2075 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002076 return(NULL);
2077 }
2078 if (xmlParserDebugEntities)
2079 xmlGenericError(xmlGenericErrorContext,
2080 "new blanks wrapper for entity: %s\n", entity->name);
2081 input = xmlNewInputStream(ctxt);
2082 if (input == NULL) {
2083 return(NULL);
2084 }
2085 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002086 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002087 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002088 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002089 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002090 return(NULL);
2091 }
2092 buffer [0] = ' ';
2093 buffer [1] = '%';
2094 buffer [length-3] = ';';
2095 buffer [length-2] = ' ';
2096 buffer [length-1] = 0;
2097 memcpy(buffer + 2, entity->name, length - 5);
2098 input->free = deallocblankswrapper;
2099 input->base = buffer;
2100 input->cur = buffer;
2101 input->length = length;
2102 input->end = &buffer[length];
2103 return(input);
2104}
2105
2106/**
Owen Taylor3473f882001-02-23 17:55:21 +00002107 * xmlParserHandlePEReference:
2108 * @ctxt: the parser context
2109 *
2110 * [69] PEReference ::= '%' Name ';'
2111 *
2112 * [ WFC: No Recursion ]
2113 * A parsed entity must not contain a recursive
2114 * reference to itself, either directly or indirectly.
2115 *
2116 * [ WFC: Entity Declared ]
2117 * In a document without any DTD, a document with only an internal DTD
2118 * subset which contains no parameter entity references, or a document
2119 * with "standalone='yes'", ... ... The declaration of a parameter
2120 * entity must precede any reference to it...
2121 *
2122 * [ VC: Entity Declared ]
2123 * In a document with an external subset or external parameter entities
2124 * with "standalone='no'", ... ... The declaration of a parameter entity
2125 * must precede any reference to it...
2126 *
2127 * [ WFC: In DTD ]
2128 * Parameter-entity references may only appear in the DTD.
2129 * NOTE: misleading but this is handled.
2130 *
2131 * A PEReference may have been detected in the current input stream
2132 * the handling is done accordingly to
2133 * http://www.w3.org/TR/REC-xml#entproc
2134 * i.e.
2135 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002136 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002137 */
2138void
2139xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002140 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002141 xmlEntityPtr entity = NULL;
2142 xmlParserInputPtr input;
2143
Owen Taylor3473f882001-02-23 17:55:21 +00002144 if (RAW != '%') return;
2145 switch(ctxt->instate) {
2146 case XML_PARSER_CDATA_SECTION:
2147 return;
2148 case XML_PARSER_COMMENT:
2149 return;
2150 case XML_PARSER_START_TAG:
2151 return;
2152 case XML_PARSER_END_TAG:
2153 return;
2154 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002155 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002156 return;
2157 case XML_PARSER_PROLOG:
2158 case XML_PARSER_START:
2159 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002160 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002161 return;
2162 case XML_PARSER_ENTITY_DECL:
2163 case XML_PARSER_CONTENT:
2164 case XML_PARSER_ATTRIBUTE_VALUE:
2165 case XML_PARSER_PI:
2166 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002167 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002168 /* we just ignore it there */
2169 return;
2170 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002171 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002172 return;
2173 case XML_PARSER_ENTITY_VALUE:
2174 /*
2175 * NOTE: in the case of entity values, we don't do the
2176 * substitution here since we need the literal
2177 * entity value to be able to save the internal
2178 * subset of the document.
2179 * This will be handled by xmlStringDecodeEntities
2180 */
2181 return;
2182 case XML_PARSER_DTD:
2183 /*
2184 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2185 * In the internal DTD subset, parameter-entity references
2186 * can occur only where markup declarations can occur, not
2187 * within markup declarations.
2188 * In that case this is handled in xmlParseMarkupDecl
2189 */
2190 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2191 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002192 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002194 break;
2195 case XML_PARSER_IGNORE:
2196 return;
2197 }
2198
2199 NEXT;
2200 name = xmlParseName(ctxt);
2201 if (xmlParserDebugEntities)
2202 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002203 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002204 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002205 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002206 } else {
2207 if (RAW == ';') {
2208 NEXT;
2209 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2210 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2211 if (entity == NULL) {
2212
2213 /*
2214 * [ WFC: Entity Declared ]
2215 * In a document without any DTD, a document with only an
2216 * internal DTD subset which contains no parameter entity
2217 * references, or a document with "standalone='yes'", ...
2218 * ... The declaration of a parameter entity must precede
2219 * any reference to it...
2220 */
2221 if ((ctxt->standalone == 1) ||
2222 ((ctxt->hasExternalSubset == 0) &&
2223 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002225 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 } else {
2227 /*
2228 * [ VC: Entity Declared ]
2229 * In a document with an external subset or external
2230 * parameter entities with "standalone='no'", ...
2231 * ... The declaration of a parameter entity must precede
2232 * any reference to it...
2233 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002234 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2235 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2236 "PEReference: %%%s; not found\n",
2237 name);
2238 } else
2239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2240 "PEReference: %%%s; not found\n",
2241 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 ctxt->valid = 0;
2243 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002244 } else if (ctxt->input->free != deallocblankswrapper) {
2245 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2246 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else {
2248 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2249 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002250 xmlChar start[4];
2251 xmlCharEncoding enc;
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 /*
2254 * handle the extra spaces added before and after
2255 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002256 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002257 */
2258 input = xmlNewEntityInputStream(ctxt, entity);
2259 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002260
2261 /*
2262 * Get the 4 first bytes and decode the charset
2263 * if enc != XML_CHAR_ENCODING_NONE
2264 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002265 * Note that, since we may have some non-UTF8
2266 * encoding (like UTF16, bug 135229), the 'length'
2267 * is not known, but we can calculate based upon
2268 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002269 */
2270 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002271 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002272 start[0] = RAW;
2273 start[1] = NXT(1);
2274 start[2] = NXT(2);
2275 start[3] = NXT(3);
2276 enc = xmlDetectCharEncoding(start, 4);
2277 if (enc != XML_CHAR_ENCODING_NONE) {
2278 xmlSwitchEncoding(ctxt, enc);
2279 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002280 }
2281
Owen Taylor3473f882001-02-23 17:55:21 +00002282 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002283 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2284 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlParseTextDecl(ctxt);
2286 }
Owen Taylor3473f882001-02-23 17:55:21 +00002287 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2289 "PEReference: %s is not a parameter entity\n",
2290 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292 }
2293 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002294 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
Owen Taylor3473f882001-02-23 17:55:21 +00002296 }
2297}
2298
2299/*
2300 * Macro used to grow the current buffer.
2301 */
2302#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002303 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002304 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002305 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002306 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002307 if (tmp == NULL) goto mem_error; \
2308 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002309}
2310
2311/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002312 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002313 * @ctxt: the parser context
2314 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002315 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002316 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2317 * @end: an end marker xmlChar, 0 if none
2318 * @end2: an end marker xmlChar, 0 if none
2319 * @end3: an end marker xmlChar, 0 if none
2320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002321 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002322 *
2323 * [67] Reference ::= EntityRef | CharRef
2324 *
2325 * [69] PEReference ::= '%' Name ';'
2326 *
2327 * Returns A newly allocated string with the substitution done. The caller
2328 * must deallocate it !
2329 */
2330xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2332 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002333 xmlChar *buffer = NULL;
2334 int buffer_size = 0;
2335
2336 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002337 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002338 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 xmlEntityPtr ent;
2340 int c,l;
2341 int nbchars = 0;
2342
Daniel Veillarda82b1822004-11-08 16:24:57 +00002343 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002344 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002346
2347 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002348 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002349 return(NULL);
2350 }
2351
2352 /*
2353 * allocate a translation buffer.
2354 */
2355 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002356 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002357 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002358
2359 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002360 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 * we are operating on already parsed values.
2362 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002363 if (str < last)
2364 c = CUR_SCHAR(str, l);
2365 else
2366 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002367 while ((c != 0) && (c != end) && /* non input consuming loop */
2368 (c != end2) && (c != end3)) {
2369
2370 if (c == 0) break;
2371 if ((c == '&') && (str[1] == '#')) {
2372 int val = xmlParseStringCharRef(ctxt, &str);
2373 if (val != 0) {
2374 COPY_BUF(0,buffer,nbchars,val);
2375 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002376 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2377 growBuffer(buffer);
2378 }
Owen Taylor3473f882001-02-23 17:55:21 +00002379 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2380 if (xmlParserDebugEntities)
2381 xmlGenericError(xmlGenericErrorContext,
2382 "String decoding Entity Reference: %.30s\n",
2383 str);
2384 ent = xmlParseStringEntityRef(ctxt, &str);
2385 if ((ent != NULL) &&
2386 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2387 if (ent->content != NULL) {
2388 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002389 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2390 growBuffer(buffer);
2391 }
Owen Taylor3473f882001-02-23 17:55:21 +00002392 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002393 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2394 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002395 }
2396 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002397 ctxt->depth++;
2398 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2399 0, 0, 0);
2400 ctxt->depth--;
2401 if (rep != NULL) {
2402 current = rep;
2403 while (*current != 0) { /* non input consuming loop */
2404 buffer[nbchars++] = *current++;
2405 if (nbchars >
2406 buffer_size - XML_PARSER_BUFFER_SIZE) {
2407 growBuffer(buffer);
2408 }
2409 }
2410 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002411 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002412 }
2413 } else if (ent != NULL) {
2414 int i = xmlStrlen(ent->name);
2415 const xmlChar *cur = ent->name;
2416
2417 buffer[nbchars++] = '&';
2418 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2419 growBuffer(buffer);
2420 }
2421 for (;i > 0;i--)
2422 buffer[nbchars++] = *cur++;
2423 buffer[nbchars++] = ';';
2424 }
2425 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2426 if (xmlParserDebugEntities)
2427 xmlGenericError(xmlGenericErrorContext,
2428 "String decoding PE Reference: %.30s\n", str);
2429 ent = xmlParseStringPEReference(ctxt, &str);
2430 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002431 if (ent->content == NULL) {
2432 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2433 }
2434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 ctxt->depth++;
2436 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2437 0, 0, 0);
2438 ctxt->depth--;
2439 if (rep != NULL) {
2440 current = rep;
2441 while (*current != 0) { /* non input consuming loop */
2442 buffer[nbchars++] = *current++;
2443 if (nbchars >
2444 buffer_size - XML_PARSER_BUFFER_SIZE) {
2445 growBuffer(buffer);
2446 }
2447 }
2448 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002449 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002450 }
2451 }
2452 } else {
2453 COPY_BUF(l,buffer,nbchars,c);
2454 str += l;
2455 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2456 growBuffer(buffer);
2457 }
2458 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002459 if (str < last)
2460 c = CUR_SCHAR(str, l);
2461 else
2462 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002463 }
2464 buffer[nbchars++] = 0;
2465 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002466
2467mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002468 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002469 if (rep != NULL)
2470 xmlFree(rep);
2471 if (buffer != NULL)
2472 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002473 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002474}
2475
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476/**
2477 * xmlStringDecodeEntities:
2478 * @ctxt: the parser context
2479 * @str: the input string
2480 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2481 * @end: an end marker xmlChar, 0 if none
2482 * @end2: an end marker xmlChar, 0 if none
2483 * @end3: an end marker xmlChar, 0 if none
2484 *
2485 * Takes a entity string content and process to do the adequate substitutions.
2486 *
2487 * [67] Reference ::= EntityRef | CharRef
2488 *
2489 * [69] PEReference ::= '%' Name ';'
2490 *
2491 * Returns A newly allocated string with the substitution done. The caller
2492 * must deallocate it !
2493 */
2494xmlChar *
2495xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2496 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002497 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002498 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2499 end, end2, end3));
2500}
Owen Taylor3473f882001-02-23 17:55:21 +00002501
2502/************************************************************************
2503 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002504 * Commodity functions, cleanup needed ? *
2505 * *
2506 ************************************************************************/
2507
2508/**
2509 * areBlanks:
2510 * @ctxt: an XML parser context
2511 * @str: a xmlChar *
2512 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002513 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002514 *
2515 * Is this a sequence of blank chars that one can ignore ?
2516 *
2517 * Returns 1 if ignorable 0 otherwise.
2518 */
2519
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002520static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2521 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002522 int i, ret;
2523 xmlNodePtr lastChild;
2524
Daniel Veillard05c13a22001-09-09 08:38:09 +00002525 /*
2526 * Don't spend time trying to differentiate them, the same callback is
2527 * used !
2528 */
2529 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002530 return(0);
2531
Owen Taylor3473f882001-02-23 17:55:21 +00002532 /*
2533 * Check for xml:space value.
2534 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002535 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2536 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002537 return(0);
2538
2539 /*
2540 * Check that the string is made of blanks
2541 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002542 if (blank_chars == 0) {
2543 for (i = 0;i < len;i++)
2544 if (!(IS_BLANK_CH(str[i]))) return(0);
2545 }
Owen Taylor3473f882001-02-23 17:55:21 +00002546
2547 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002548 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002549 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002550 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (ctxt->myDoc != NULL) {
2552 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2553 if (ret == 0) return(1);
2554 if (ret == 1) return(0);
2555 }
2556
2557 /*
2558 * Otherwise, heuristic :-\
2559 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002560 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002561 if ((ctxt->node->children == NULL) &&
2562 (RAW == '<') && (NXT(1) == '/')) return(0);
2563
2564 lastChild = xmlGetLastChild(ctxt->node);
2565 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002566 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2567 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else if (xmlNodeIsText(lastChild))
2569 return(0);
2570 else if ((ctxt->node->children != NULL) &&
2571 (xmlNodeIsText(ctxt->node->children)))
2572 return(0);
2573 return(1);
2574}
2575
Owen Taylor3473f882001-02-23 17:55:21 +00002576/************************************************************************
2577 * *
2578 * Extra stuff for namespace support *
2579 * Relates to http://www.w3.org/TR/WD-xml-names *
2580 * *
2581 ************************************************************************/
2582
2583/**
2584 * xmlSplitQName:
2585 * @ctxt: an XML parser context
2586 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002588 *
2589 * parse an UTF8 encoded XML qualified name string
2590 *
2591 * [NS 5] QName ::= (Prefix ':')? LocalPart
2592 *
2593 * [NS 6] Prefix ::= NCName
2594 *
2595 * [NS 7] LocalPart ::= NCName
2596 *
2597 * Returns the local part, and prefix is updated
2598 * to get the Prefix if any.
2599 */
2600
2601xmlChar *
2602xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2603 xmlChar buf[XML_MAX_NAMELEN + 5];
2604 xmlChar *buffer = NULL;
2605 int len = 0;
2606 int max = XML_MAX_NAMELEN;
2607 xmlChar *ret = NULL;
2608 const xmlChar *cur = name;
2609 int c;
2610
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002611 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 *prefix = NULL;
2613
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002614 if (cur == NULL) return(NULL);
2615
Owen Taylor3473f882001-02-23 17:55:21 +00002616#ifndef XML_XML_NAMESPACE
2617 /* xml: prefix is not really a namespace */
2618 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2619 (cur[2] == 'l') && (cur[3] == ':'))
2620 return(xmlStrdup(name));
2621#endif
2622
Daniel Veillard597bc482003-07-24 16:08:28 +00002623 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (cur[0] == ':')
2625 return(xmlStrdup(name));
2626
2627 c = *cur++;
2628 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2629 buf[len++] = c;
2630 c = *cur++;
2631 }
2632 if (len >= max) {
2633 /*
2634 * Okay someone managed to make a huge name, so he's ready to pay
2635 * for the processing speed.
2636 */
2637 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002638
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002639 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002640 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002641 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 return(NULL);
2643 }
2644 memcpy(buffer, buf, len);
2645 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2646 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002647 xmlChar *tmp;
2648
Owen Taylor3473f882001-02-23 17:55:21 +00002649 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002650 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002651 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002652 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002653 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002654 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002655 return(NULL);
2656 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002657 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002658 }
2659 buffer[len++] = c;
2660 c = *cur++;
2661 }
2662 buffer[len] = 0;
2663 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002664
Daniel Veillard597bc482003-07-24 16:08:28 +00002665 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002666 if (buffer != NULL)
2667 xmlFree(buffer);
2668 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002669 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002670 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002671
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL)
2673 ret = xmlStrndup(buf, len);
2674 else {
2675 ret = buffer;
2676 buffer = NULL;
2677 max = XML_MAX_NAMELEN;
2678 }
2679
2680
2681 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002682 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002683 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002684 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002685 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002686 }
Owen Taylor3473f882001-02-23 17:55:21 +00002687 len = 0;
2688
Daniel Veillardbb284f42002-10-16 18:02:47 +00002689 /*
2690 * Check that the first character is proper to start
2691 * a new name
2692 */
2693 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2694 ((c >= 0x41) && (c <= 0x5A)) ||
2695 (c == '_') || (c == ':'))) {
2696 int l;
2697 int first = CUR_SCHAR(cur, l);
2698
2699 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002700 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002701 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002702 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002703 }
2704 }
2705 cur++;
2706
Owen Taylor3473f882001-02-23 17:55:21 +00002707 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2708 buf[len++] = c;
2709 c = *cur++;
2710 }
2711 if (len >= max) {
2712 /*
2713 * Okay someone managed to make a huge name, so he's ready to pay
2714 * for the processing speed.
2715 */
2716 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002717
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002718 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002719 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002720 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002721 return(NULL);
2722 }
2723 memcpy(buffer, buf, len);
2724 while (c != 0) { /* tested bigname2.xml */
2725 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002726 xmlChar *tmp;
2727
Owen Taylor3473f882001-02-23 17:55:21 +00002728 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002729 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002730 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002731 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002732 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002733 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002736 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002737 }
2738 buffer[len++] = c;
2739 c = *cur++;
2740 }
2741 buffer[len] = 0;
2742 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002743
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if (buffer == NULL)
2745 ret = xmlStrndup(buf, len);
2746 else {
2747 ret = buffer;
2748 }
2749 }
2750
2751 return(ret);
2752}
2753
2754/************************************************************************
2755 * *
2756 * The parser itself *
2757 * Relates to http://www.w3.org/TR/REC-xml *
2758 * *
2759 ************************************************************************/
2760
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002761static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002762static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002764
Owen Taylor3473f882001-02-23 17:55:21 +00002765/**
2766 * xmlParseName:
2767 * @ctxt: an XML parser context
2768 *
2769 * parse an XML name.
2770 *
2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772 * CombiningChar | Extender
2773 *
2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2775 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002776 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002777 *
2778 * Returns the Name parsed or NULL
2779 */
2780
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002781const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002782xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002783 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002784 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002785 int count = 0;
2786
2787 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002788
2789 /*
2790 * Accelerator for simple ASCII names
2791 */
2792 in = ctxt->input->cur;
2793 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2794 ((*in >= 0x41) && (*in <= 0x5A)) ||
2795 (*in == '_') || (*in == ':')) {
2796 in++;
2797 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2798 ((*in >= 0x41) && (*in <= 0x5A)) ||
2799 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002800 (*in == '_') || (*in == '-') ||
2801 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002802 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002803 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002804 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002805 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002806 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002807 ctxt->nbChars += count;
2808 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002809 if (ret == NULL)
2810 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002811 return(ret);
2812 }
2813 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002814 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002815}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002816
Daniel Veillard46de64e2002-05-29 08:21:33 +00002817/**
2818 * xmlParseNameAndCompare:
2819 * @ctxt: an XML parser context
2820 *
2821 * parse an XML name and compares for match
2822 * (specialized for endtag parsing)
2823 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002824 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2825 * and the name for mismatch
2826 */
2827
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002828static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002829xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002830 register const xmlChar *cmp = other;
2831 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002832 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002833
2834 GROW;
2835
2836 in = ctxt->input->cur;
2837 while (*in != 0 && *in == *cmp) {
2838 ++in;
2839 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002840 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002841 }
William M. Brack76e95df2003-10-18 16:20:14 +00002842 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002843 /* success */
2844 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002845 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002846 }
2847 /* failure (or end of input buffer), check with full function */
2848 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002849 /* strings coming from the dictionnary direct compare possible */
2850 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002851 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002852 }
2853 return ret;
2854}
2855
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002856static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002857xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002858 int len = 0, l;
2859 int c;
2860 int count = 0;
2861
2862 /*
2863 * Handler for more complex cases
2864 */
2865 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002866 c = CUR_CHAR(l);
2867 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2868 (!IS_LETTER(c) && (c != '_') &&
2869 (c != ':'))) {
2870 return(NULL);
2871 }
2872
2873 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002874 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002875 (c == '.') || (c == '-') ||
2876 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002877 (IS_COMBINING(c)) ||
2878 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002879 if (count++ > 100) {
2880 count = 0;
2881 GROW;
2882 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002883 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002884 NEXTL(l);
2885 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
Daniel Veillard96688262005-08-23 18:14:12 +00002887 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2888 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002889 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002890}
2891
2892/**
2893 * xmlParseStringName:
2894 * @ctxt: an XML parser context
2895 * @str: a pointer to the string pointer (IN/OUT)
2896 *
2897 * parse an XML name.
2898 *
2899 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2900 * CombiningChar | Extender
2901 *
2902 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2903 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002904 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002905 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002906 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002907 * is updated to the current location in the string.
2908 */
2909
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002910static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002911xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2912 xmlChar buf[XML_MAX_NAMELEN + 5];
2913 const xmlChar *cur = *str;
2914 int len = 0, l;
2915 int c;
2916
2917 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002918 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002919 (c != ':')) {
2920 return(NULL);
2921 }
2922
William M. Brack871611b2003-10-18 04:53:14 +00002923 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002924 (c == '.') || (c == '-') ||
2925 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002926 (IS_COMBINING(c)) ||
2927 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002928 COPY_BUF(l,buf,len,c);
2929 cur += l;
2930 c = CUR_SCHAR(cur, l);
2931 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2932 /*
2933 * Okay someone managed to make a huge name, so he's ready to pay
2934 * for the processing speed.
2935 */
2936 xmlChar *buffer;
2937 int max = len * 2;
2938
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002939 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002940 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002941 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002942 return(NULL);
2943 }
2944 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002946 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002947 (c == '.') || (c == '-') ||
2948 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002949 (IS_COMBINING(c)) ||
2950 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002951 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002952 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002953 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002954 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002957 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002958 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002959 return(NULL);
2960 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002961 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002962 }
2963 COPY_BUF(l,buffer,len,c);
2964 cur += l;
2965 c = CUR_SCHAR(cur, l);
2966 }
2967 buffer[len] = 0;
2968 *str = cur;
2969 return(buffer);
2970 }
2971 }
2972 *str = cur;
2973 return(xmlStrndup(buf, len));
2974}
2975
2976/**
2977 * xmlParseNmtoken:
2978 * @ctxt: an XML parser context
2979 *
2980 * parse an XML Nmtoken.
2981 *
2982 * [7] Nmtoken ::= (NameChar)+
2983 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002984 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002985 *
2986 * Returns the Nmtoken parsed or NULL
2987 */
2988
2989xmlChar *
2990xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2991 xmlChar buf[XML_MAX_NAMELEN + 5];
2992 int len = 0, l;
2993 int c;
2994 int count = 0;
2995
2996 GROW;
2997 c = CUR_CHAR(l);
2998
William M. Brack871611b2003-10-18 04:53:14 +00002999 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003000 (c == '.') || (c == '-') ||
3001 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003002 (IS_COMBINING(c)) ||
3003 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (count++ > 100) {
3005 count = 0;
3006 GROW;
3007 }
3008 COPY_BUF(l,buf,len,c);
3009 NEXTL(l);
3010 c = CUR_CHAR(l);
3011 if (len >= XML_MAX_NAMELEN) {
3012 /*
3013 * Okay someone managed to make a huge token, so he's ready to pay
3014 * for the processing speed.
3015 */
3016 xmlChar *buffer;
3017 int max = len * 2;
3018
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003019 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003020 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003021 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003022 return(NULL);
3023 }
3024 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003025 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003026 (c == '.') || (c == '-') ||
3027 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003028 (IS_COMBINING(c)) ||
3029 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003030 if (count++ > 100) {
3031 count = 0;
3032 GROW;
3033 }
3034 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 xmlChar *tmp;
3036
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003039 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003040 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003041 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003042 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 COPY_BUF(l,buffer,len,c);
3048 NEXTL(l);
3049 c = CUR_CHAR(l);
3050 }
3051 buffer[len] = 0;
3052 return(buffer);
3053 }
3054 }
3055 if (len == 0)
3056 return(NULL);
3057 return(xmlStrndup(buf, len));
3058}
3059
3060/**
3061 * xmlParseEntityValue:
3062 * @ctxt: an XML parser context
3063 * @orig: if non-NULL store a copy of the original entity value
3064 *
3065 * parse a value for ENTITY declarations
3066 *
3067 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3068 * "'" ([^%&'] | PEReference | Reference)* "'"
3069 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003070 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003071 */
3072
3073xmlChar *
3074xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3075 xmlChar *buf = NULL;
3076 int len = 0;
3077 int size = XML_PARSER_BUFFER_SIZE;
3078 int c, l;
3079 xmlChar stop;
3080 xmlChar *ret = NULL;
3081 const xmlChar *cur = NULL;
3082 xmlParserInputPtr input;
3083
3084 if (RAW == '"') stop = '"';
3085 else if (RAW == '\'') stop = '\'';
3086 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 return(NULL);
3089 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003090 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003091 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003092 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003093 return(NULL);
3094 }
3095
3096 /*
3097 * The content of the entity definition is copied in a buffer.
3098 */
3099
3100 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3101 input = ctxt->input;
3102 GROW;
3103 NEXT;
3104 c = CUR_CHAR(l);
3105 /*
3106 * NOTE: 4.4.5 Included in Literal
3107 * When a parameter entity reference appears in a literal entity
3108 * value, ... a single or double quote character in the replacement
3109 * text is always treated as a normal data character and will not
3110 * terminate the literal.
3111 * In practice it means we stop the loop only when back at parsing
3112 * the initial entity and the quote is found
3113 */
William M. Brack871611b2003-10-18 04:53:14 +00003114 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003115 (ctxt->input != input))) {
3116 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 xmlChar *tmp;
3118
Owen Taylor3473f882001-02-23 17:55:21 +00003119 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003120 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3121 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003122 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003123 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003126 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 }
3128 COPY_BUF(l,buf,len,c);
3129 NEXTL(l);
3130 /*
3131 * Pop-up of finished entities.
3132 */
3133 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3134 xmlPopInput(ctxt);
3135
3136 GROW;
3137 c = CUR_CHAR(l);
3138 if (c == 0) {
3139 GROW;
3140 c = CUR_CHAR(l);
3141 }
3142 }
3143 buf[len] = 0;
3144
3145 /*
3146 * Raise problem w.r.t. '&' and '%' being used in non-entities
3147 * reference constructs. Note Charref will be handled in
3148 * xmlStringDecodeEntities()
3149 */
3150 cur = buf;
3151 while (*cur != 0) { /* non input consuming */
3152 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3153 xmlChar *name;
3154 xmlChar tmp = *cur;
3155
3156 cur++;
3157 name = xmlParseStringName(ctxt, &cur);
3158 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003159 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003160 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003161 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003163 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3164 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003165 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003166 }
3167 if (name != NULL)
3168 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003169 if (*cur == 0)
3170 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003171 }
3172 cur++;
3173 }
3174
3175 /*
3176 * Then PEReference entities are substituted.
3177 */
3178 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003179 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003180 xmlFree(buf);
3181 } else {
3182 NEXT;
3183 /*
3184 * NOTE: 4.4.7 Bypassed
3185 * When a general entity reference appears in the EntityValue in
3186 * an entity declaration, it is bypassed and left as is.
3187 * so XML_SUBSTITUTE_REF is not set here.
3188 */
3189 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3190 0, 0, 0);
3191 if (orig != NULL)
3192 *orig = buf;
3193 else
3194 xmlFree(buf);
3195 }
3196
3197 return(ret);
3198}
3199
3200/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003201 * xmlParseAttValueComplex:
3202 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003203 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003205 *
3206 * parse a value for an attribute, this is the fallback function
3207 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003208 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003209 *
3210 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3211 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003212static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003213xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003214 xmlChar limit = 0;
3215 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003216 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003217 int len = 0;
3218 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003219 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003220 xmlChar *current = NULL;
3221 xmlEntityPtr ent;
3222
Owen Taylor3473f882001-02-23 17:55:21 +00003223 if (NXT(0) == '"') {
3224 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3225 limit = '"';
3226 NEXT;
3227 } else if (NXT(0) == '\'') {
3228 limit = '\'';
3229 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3230 NEXT;
3231 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003232 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003233 return(NULL);
3234 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003235
Owen Taylor3473f882001-02-23 17:55:21 +00003236 /*
3237 * allocate a translation buffer.
3238 */
3239 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003240 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003241 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003242
3243 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003244 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003245 */
3246 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003247 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003248 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003249 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003250 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003251 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003252 if (NXT(1) == '#') {
3253 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003254
Owen Taylor3473f882001-02-23 17:55:21 +00003255 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003256 if (ctxt->replaceEntities) {
3257 if (len > buf_size - 10) {
3258 growBuffer(buf);
3259 }
3260 buf[len++] = '&';
3261 } else {
3262 /*
3263 * The reparsing will be done in xmlStringGetNodeList()
3264 * called by the attribute() function in SAX.c
3265 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003266 if (len > buf_size - 10) {
3267 growBuffer(buf);
3268 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003269 buf[len++] = '&';
3270 buf[len++] = '#';
3271 buf[len++] = '3';
3272 buf[len++] = '8';
3273 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003274 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003275 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003276 if (len > buf_size - 10) {
3277 growBuffer(buf);
3278 }
Owen Taylor3473f882001-02-23 17:55:21 +00003279 len += xmlCopyChar(0, &buf[len], val);
3280 }
3281 } else {
3282 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003283 if ((ent != NULL) &&
3284 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3285 if (len > buf_size - 10) {
3286 growBuffer(buf);
3287 }
3288 if ((ctxt->replaceEntities == 0) &&
3289 (ent->content[0] == '&')) {
3290 buf[len++] = '&';
3291 buf[len++] = '#';
3292 buf[len++] = '3';
3293 buf[len++] = '8';
3294 buf[len++] = ';';
3295 } else {
3296 buf[len++] = ent->content[0];
3297 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003298 } else if ((ent != NULL) &&
3299 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003300 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3301 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003302 XML_SUBSTITUTE_REF,
3303 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (rep != NULL) {
3305 current = rep;
3306 while (*current != 0) { /* non input consuming */
3307 buf[len++] = *current++;
3308 if (len > buf_size - 10) {
3309 growBuffer(buf);
3310 }
3311 }
3312 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003313 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003316 if (len > buf_size - 10) {
3317 growBuffer(buf);
3318 }
Owen Taylor3473f882001-02-23 17:55:21 +00003319 if (ent->content != NULL)
3320 buf[len++] = ent->content[0];
3321 }
3322 } else if (ent != NULL) {
3323 int i = xmlStrlen(ent->name);
3324 const xmlChar *cur = ent->name;
3325
3326 /*
3327 * This may look absurd but is needed to detect
3328 * entities problems
3329 */
3330 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3331 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003333 XML_SUBSTITUTE_REF, 0, 0, 0);
3334 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003336 rep = NULL;
3337 }
Owen Taylor3473f882001-02-23 17:55:21 +00003338 }
3339
3340 /*
3341 * Just output the reference
3342 */
3343 buf[len++] = '&';
3344 if (len > buf_size - i - 10) {
3345 growBuffer(buf);
3346 }
3347 for (;i > 0;i--)
3348 buf[len++] = *cur++;
3349 buf[len++] = ';';
3350 }
3351 }
3352 } else {
3353 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003354 if ((len != 0) || (!normalize)) {
3355 if ((!normalize) || (!in_space)) {
3356 COPY_BUF(l,buf,len,0x20);
3357 if (len > buf_size - 10) {
3358 growBuffer(buf);
3359 }
3360 }
3361 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 }
3363 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003364 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003365 COPY_BUF(l,buf,len,c);
3366 if (len > buf_size - 10) {
3367 growBuffer(buf);
3368 }
3369 }
3370 NEXTL(l);
3371 }
3372 GROW;
3373 c = CUR_CHAR(l);
3374 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003375 if ((in_space) && (normalize)) {
3376 while (buf[len - 1] == 0x20) len--;
3377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003378 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003380 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003382 if ((c != 0) && (!IS_CHAR(c))) {
3383 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3384 "invalid character in attribute value\n");
3385 } else {
3386 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3387 "AttValue: ' expected\n");
3388 }
Owen Taylor3473f882001-02-23 17:55:21 +00003389 } else
3390 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003391 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003393
3394mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003395 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003396 if (buf != NULL)
3397 xmlFree(buf);
3398 if (rep != NULL)
3399 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003400 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003401}
3402
3403/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003404 * xmlParseAttValue:
3405 * @ctxt: an XML parser context
3406 *
3407 * parse a value for an attribute
3408 * Note: the parser won't do substitution of entities here, this
3409 * will be handled later in xmlStringGetNodeList
3410 *
3411 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3412 * "'" ([^<&'] | Reference)* "'"
3413 *
3414 * 3.3.3 Attribute-Value Normalization:
3415 * Before the value of an attribute is passed to the application or
3416 * checked for validity, the XML processor must normalize it as follows:
3417 * - a character reference is processed by appending the referenced
3418 * character to the attribute value
3419 * - an entity reference is processed by recursively processing the
3420 * replacement text of the entity
3421 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3422 * appending #x20 to the normalized value, except that only a single
3423 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3424 * parsed entity or the literal entity value of an internal parsed entity
3425 * - other characters are processed by appending them to the normalized value
3426 * If the declared value is not CDATA, then the XML processor must further
3427 * process the normalized attribute value by discarding any leading and
3428 * trailing space (#x20) characters, and by replacing sequences of space
3429 * (#x20) characters by a single space (#x20) character.
3430 * All attributes for which no declaration has been read should be treated
3431 * by a non-validating parser as if declared CDATA.
3432 *
3433 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3434 */
3435
3436
3437xmlChar *
3438xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003439 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003440 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003441}
3442
3443/**
Owen Taylor3473f882001-02-23 17:55:21 +00003444 * xmlParseSystemLiteral:
3445 * @ctxt: an XML parser context
3446 *
3447 * parse an XML Literal
3448 *
3449 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3450 *
3451 * Returns the SystemLiteral parsed or NULL
3452 */
3453
3454xmlChar *
3455xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3456 xmlChar *buf = NULL;
3457 int len = 0;
3458 int size = XML_PARSER_BUFFER_SIZE;
3459 int cur, l;
3460 xmlChar stop;
3461 int state = ctxt->instate;
3462 int count = 0;
3463
3464 SHRINK;
3465 if (RAW == '"') {
3466 NEXT;
3467 stop = '"';
3468 } else if (RAW == '\'') {
3469 NEXT;
3470 stop = '\'';
3471 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003472 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003473 return(NULL);
3474 }
3475
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003476 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003477 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003478 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003479 return(NULL);
3480 }
3481 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3482 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003483 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003484 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003485 xmlChar *tmp;
3486
Owen Taylor3473f882001-02-23 17:55:21 +00003487 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003488 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3489 if (tmp == NULL) {
3490 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003491 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003492 ctxt->instate = (xmlParserInputState) state;
3493 return(NULL);
3494 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003495 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003496 }
3497 count++;
3498 if (count > 50) {
3499 GROW;
3500 count = 0;
3501 }
3502 COPY_BUF(l,buf,len,cur);
3503 NEXTL(l);
3504 cur = CUR_CHAR(l);
3505 if (cur == 0) {
3506 GROW;
3507 SHRINK;
3508 cur = CUR_CHAR(l);
3509 }
3510 }
3511 buf[len] = 0;
3512 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003513 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003514 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003515 } else {
3516 NEXT;
3517 }
3518 return(buf);
3519}
3520
3521/**
3522 * xmlParsePubidLiteral:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse an XML public literal
3526 *
3527 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3528 *
3529 * Returns the PubidLiteral parsed or NULL.
3530 */
3531
3532xmlChar *
3533xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3534 xmlChar *buf = NULL;
3535 int len = 0;
3536 int size = XML_PARSER_BUFFER_SIZE;
3537 xmlChar cur;
3538 xmlChar stop;
3539 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003540 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003541
3542 SHRINK;
3543 if (RAW == '"') {
3544 NEXT;
3545 stop = '"';
3546 } else if (RAW == '\'') {
3547 NEXT;
3548 stop = '\'';
3549 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003550 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003551 return(NULL);
3552 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003553 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003554 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003555 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003556 return(NULL);
3557 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003558 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003560 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003561 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 xmlChar *tmp;
3563
Owen Taylor3473f882001-02-23 17:55:21 +00003564 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003565 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3566 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003567 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003568 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 return(NULL);
3570 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003571 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003572 }
3573 buf[len++] = cur;
3574 count++;
3575 if (count > 50) {
3576 GROW;
3577 count = 0;
3578 }
3579 NEXT;
3580 cur = CUR;
3581 if (cur == 0) {
3582 GROW;
3583 SHRINK;
3584 cur = CUR;
3585 }
3586 }
3587 buf[len] = 0;
3588 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003589 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003590 } else {
3591 NEXT;
3592 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003593 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003594 return(buf);
3595}
3596
Daniel Veillard48b2f892001-02-25 16:11:03 +00003597void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003598
3599/*
3600 * used for the test in the inner loop of the char data testing
3601 */
3602static const unsigned char test_char_data[256] = {
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3608 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3609 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3610 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3611 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3612 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3613 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3614 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3615 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3616 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3617 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3618 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3632 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3633 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3634 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3635};
3636
Owen Taylor3473f882001-02-23 17:55:21 +00003637/**
3638 * xmlParseCharData:
3639 * @ctxt: an XML parser context
3640 * @cdata: int indicating whether we are within a CDATA section
3641 *
3642 * parse a CharData section.
3643 * if we are within a CDATA section ']]>' marks an end of section.
3644 *
3645 * The right angle bracket (>) may be represented using the string "&gt;",
3646 * and must, for compatibility, be escaped using "&gt;" or a character
3647 * reference when it appears in the string "]]>" in content, when that
3648 * string is not marking the end of a CDATA section.
3649 *
3650 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3651 */
3652
3653void
3654xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003655 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003656 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003657 int line = ctxt->input->line;
3658 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003659 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003660
3661 SHRINK;
3662 GROW;
3663 /*
3664 * Accelerated common case where input don't need to be
3665 * modified before passing it to the handler.
3666 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003667 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003668 in = ctxt->input->cur;
3669 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003670get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003671 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003672 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003673 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003674 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003675 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003676 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003677 goto get_more_space;
3678 }
3679 if (*in == '<') {
3680 nbchar = in - ctxt->input->cur;
3681 if (nbchar > 0) {
3682 const xmlChar *tmp = ctxt->input->cur;
3683 ctxt->input->cur = in;
3684
Daniel Veillard34099b42004-11-04 17:34:35 +00003685 if ((ctxt->sax != NULL) &&
3686 (ctxt->sax->ignorableWhitespace !=
3687 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003688 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003689 if (ctxt->sax->ignorableWhitespace != NULL)
3690 ctxt->sax->ignorableWhitespace(ctxt->userData,
3691 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003692 } else {
3693 if (ctxt->sax->characters != NULL)
3694 ctxt->sax->characters(ctxt->userData,
3695 tmp, nbchar);
3696 if (*ctxt->space == -1)
3697 *ctxt->space = -2;
3698 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003699 } else if ((ctxt->sax != NULL) &&
3700 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003701 ctxt->sax->characters(ctxt->userData,
3702 tmp, nbchar);
3703 }
3704 }
3705 return;
3706 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003707
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003708get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003709 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003710 while (test_char_data[*in]) {
3711 in++;
3712 ccol++;
3713 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003714 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003715 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003716 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003717 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003718 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003719 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003720 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003721 }
3722 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003723 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003724 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003725 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003726 return;
3727 }
3728 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003729 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003730 goto get_more;
3731 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003732 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003733 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003734 if ((ctxt->sax != NULL) &&
3735 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003736 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003737 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003738 const xmlChar *tmp = ctxt->input->cur;
3739 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003740
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003741 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003742 if (ctxt->sax->ignorableWhitespace != NULL)
3743 ctxt->sax->ignorableWhitespace(ctxt->userData,
3744 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003745 } else {
3746 if (ctxt->sax->characters != NULL)
3747 ctxt->sax->characters(ctxt->userData,
3748 tmp, nbchar);
3749 if (*ctxt->space == -1)
3750 *ctxt->space = -2;
3751 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003752 line = ctxt->input->line;
3753 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003754 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003755 if (ctxt->sax->characters != NULL)
3756 ctxt->sax->characters(ctxt->userData,
3757 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003758 line = ctxt->input->line;
3759 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003760 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003761 }
3762 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003763 if (*in == 0xD) {
3764 in++;
3765 if (*in == 0xA) {
3766 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003767 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003768 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003769 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003770 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003771 in--;
3772 }
3773 if (*in == '<') {
3774 return;
3775 }
3776 if (*in == '&') {
3777 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003778 }
3779 SHRINK;
3780 GROW;
3781 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003782 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003783 nbchar = 0;
3784 }
Daniel Veillard50582112001-03-26 22:52:16 +00003785 ctxt->input->line = line;
3786 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003787 xmlParseCharDataComplex(ctxt, cdata);
3788}
3789
Daniel Veillard01c13b52002-12-10 15:19:08 +00003790/**
3791 * xmlParseCharDataComplex:
3792 * @ctxt: an XML parser context
3793 * @cdata: int indicating whether we are within a CDATA section
3794 *
3795 * parse a CharData section.this is the fallback function
3796 * of xmlParseCharData() when the parsing requires handling
3797 * of non-ASCII characters.
3798 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003799void
3800xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003801 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3802 int nbchar = 0;
3803 int cur, l;
3804 int count = 0;
3805
3806 SHRINK;
3807 GROW;
3808 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003809 while ((cur != '<') && /* checked */
3810 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003811 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003812 if ((cur == ']') && (NXT(1) == ']') &&
3813 (NXT(2) == '>')) {
3814 if (cdata) break;
3815 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 }
3818 }
3819 COPY_BUF(l,buf,nbchar,cur);
3820 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003821 buf[nbchar] = 0;
3822
Owen Taylor3473f882001-02-23 17:55:21 +00003823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003824 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003825 */
3826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003827 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003828 if (ctxt->sax->ignorableWhitespace != NULL)
3829 ctxt->sax->ignorableWhitespace(ctxt->userData,
3830 buf, nbchar);
3831 } else {
3832 if (ctxt->sax->characters != NULL)
3833 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003834 if ((ctxt->sax->characters !=
3835 ctxt->sax->ignorableWhitespace) &&
3836 (*ctxt->space == -1))
3837 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
3839 }
3840 nbchar = 0;
3841 }
3842 count++;
3843 if (count > 50) {
3844 GROW;
3845 count = 0;
3846 }
3847 NEXTL(l);
3848 cur = CUR_CHAR(l);
3849 }
3850 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003851 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003853 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003854 */
3855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003856 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (ctxt->sax->ignorableWhitespace != NULL)
3858 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3859 } else {
3860 if (ctxt->sax->characters != NULL)
3861 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003862 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3863 (*ctxt->space == -1))
3864 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 }
3867 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003868 if ((cur != 0) && (!IS_CHAR(cur))) {
3869 /* Generate the error and skip the offending character */
3870 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3871 "PCDATA invalid Char value %d\n",
3872 cur);
3873 NEXTL(l);
3874 }
Owen Taylor3473f882001-02-23 17:55:21 +00003875}
3876
3877/**
3878 * xmlParseExternalID:
3879 * @ctxt: an XML parser context
3880 * @publicID: a xmlChar** receiving PubidLiteral
3881 * @strict: indicate whether we should restrict parsing to only
3882 * production [75], see NOTE below
3883 *
3884 * Parse an External ID or a Public ID
3885 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003886 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003887 * 'PUBLIC' S PubidLiteral S SystemLiteral
3888 *
3889 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3890 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3891 *
3892 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3893 *
3894 * Returns the function returns SystemLiteral and in the second
3895 * case publicID receives PubidLiteral, is strict is off
3896 * it is possible to return NULL and have publicID set.
3897 */
3898
3899xmlChar *
3900xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3901 xmlChar *URI = NULL;
3902
3903 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003904
3905 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003906 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003907 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003908 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3910 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003911 }
3912 SKIP_BLANKS;
3913 URI = xmlParseSystemLiteral(ctxt);
3914 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003917 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003918 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003921 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003922 }
3923 SKIP_BLANKS;
3924 *publicID = xmlParsePubidLiteral(ctxt);
3925 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 }
3928 if (strict) {
3929 /*
3930 * We don't handle [83] so "S SystemLiteral" is required.
3931 */
William M. Brack76e95df2003-10-18 16:20:14 +00003932 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003934 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003935 }
3936 } else {
3937 /*
3938 * We handle [83] so we return immediately, if
3939 * "S SystemLiteral" is not detected. From a purely parsing
3940 * point of view that's a nice mess.
3941 */
3942 const xmlChar *ptr;
3943 GROW;
3944
3945 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003946 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003947
William M. Brack76e95df2003-10-18 16:20:14 +00003948 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003949 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3950 }
3951 SKIP_BLANKS;
3952 URI = xmlParseSystemLiteral(ctxt);
3953 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003954 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003955 }
3956 }
3957 return(URI);
3958}
3959
3960/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003961 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003962 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003963 * @buf: the already parsed part of the buffer
3964 * @len: number of bytes filles in the buffer
3965 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003966 *
3967 * Skip an XML (SGML) comment <!-- .... -->
3968 * The spec says that "For compatibility, the string "--" (double-hyphen)
3969 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003970 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003971 *
3972 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3973 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003974static void
3975xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003976 int q, ql;
3977 int r, rl;
3978 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003979 xmlParserInputPtr input = ctxt->input;
3980 int count = 0;
3981
Owen Taylor3473f882001-02-23 17:55:21 +00003982 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003983 len = 0;
3984 size = XML_PARSER_BUFFER_SIZE;
3985 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3986 if (buf == NULL) {
3987 xmlErrMemory(ctxt, NULL);
3988 return;
3989 }
Owen Taylor3473f882001-02-23 17:55:21 +00003990 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003991 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003992 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003993 if (q == 0)
3994 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003995 if (!IS_CHAR(q)) {
3996 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3997 "xmlParseComment: invalid xmlChar value %d\n",
3998 q);
3999 xmlFree (buf);
4000 return;
4001 }
Owen Taylor3473f882001-02-23 17:55:21 +00004002 NEXTL(ql);
4003 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004004 if (r == 0)
4005 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004006 if (!IS_CHAR(r)) {
4007 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4008 "xmlParseComment: invalid xmlChar value %d\n",
4009 q);
4010 xmlFree (buf);
4011 return;
4012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 NEXTL(rl);
4014 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004015 if (cur == 0)
4016 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004017 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004018 ((cur != '>') ||
4019 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004020 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004021 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004022 }
4023 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004024 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004025 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004026 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4027 if (new_buf == NULL) {
4028 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004030 return;
4031 }
William M. Bracka3215c72004-07-31 16:24:01 +00004032 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 COPY_BUF(ql,buf,len,q);
4035 q = r;
4036 ql = rl;
4037 r = cur;
4038 rl = l;
4039
4040 count++;
4041 if (count > 50) {
4042 GROW;
4043 count = 0;
4044 }
4045 NEXTL(l);
4046 cur = CUR_CHAR(l);
4047 if (cur == 0) {
4048 SHRINK;
4049 GROW;
4050 cur = CUR_CHAR(l);
4051 }
4052 }
4053 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004054 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004055 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004056 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004057 } else if (!IS_CHAR(cur)) {
4058 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4059 "xmlParseComment: invalid xmlChar value %d\n",
4060 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004061 } else {
4062 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004063 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4064 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
4066 NEXT;
4067 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4068 (!ctxt->disableSAX))
4069 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
Daniel Veillardda629342007-08-01 07:49:06 +00004071 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004072 return;
4073not_terminated:
4074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4075 "Comment not terminated\n", NULL);
4076 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004077 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004078}
Daniel Veillardda629342007-08-01 07:49:06 +00004079
Daniel Veillard4c778d82005-01-23 17:37:44 +00004080/**
4081 * xmlParseComment:
4082 * @ctxt: an XML parser context
4083 *
4084 * Skip an XML (SGML) comment <!-- .... -->
4085 * The spec says that "For compatibility, the string "--" (double-hyphen)
4086 * must not occur within comments. "
4087 *
4088 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4089 */
4090void
4091xmlParseComment(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4093 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004094 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004095 xmlParserInputState state;
4096 const xmlChar *in;
4097 int nbchar = 0, ccol;
4098
4099 /*
4100 * Check that there is a comment right here.
4101 */
4102 if ((RAW != '<') || (NXT(1) != '!') ||
4103 (NXT(2) != '-') || (NXT(3) != '-')) return;
4104
4105 state = ctxt->instate;
4106 ctxt->instate = XML_PARSER_COMMENT;
4107 SKIP(4);
4108 SHRINK;
4109 GROW;
4110
4111 /*
4112 * Accelerated common case where input don't need to be
4113 * modified before passing it to the handler.
4114 */
4115 in = ctxt->input->cur;
4116 do {
4117 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004118 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004119 ctxt->input->line++; ctxt->input->col = 1;
4120 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004121 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004122 }
4123get_more:
4124 ccol = ctxt->input->col;
4125 while (((*in > '-') && (*in <= 0x7F)) ||
4126 ((*in >= 0x20) && (*in < '-')) ||
4127 (*in == 0x09)) {
4128 in++;
4129 ccol++;
4130 }
4131 ctxt->input->col = ccol;
4132 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004133 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004134 ctxt->input->line++; ctxt->input->col = 1;
4135 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004136 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004137 goto get_more;
4138 }
4139 nbchar = in - ctxt->input->cur;
4140 /*
4141 * save current set of data
4142 */
4143 if (nbchar > 0) {
4144 if ((ctxt->sax != NULL) &&
4145 (ctxt->sax->comment != NULL)) {
4146 if (buf == NULL) {
4147 if ((*in == '-') && (in[1] == '-'))
4148 size = nbchar + 1;
4149 else
4150 size = XML_PARSER_BUFFER_SIZE + nbchar;
4151 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4152 if (buf == NULL) {
4153 xmlErrMemory(ctxt, NULL);
4154 ctxt->instate = state;
4155 return;
4156 }
4157 len = 0;
4158 } else if (len + nbchar + 1 >= size) {
4159 xmlChar *new_buf;
4160 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4161 new_buf = (xmlChar *) xmlRealloc(buf,
4162 size * sizeof(xmlChar));
4163 if (new_buf == NULL) {
4164 xmlFree (buf);
4165 xmlErrMemory(ctxt, NULL);
4166 ctxt->instate = state;
4167 return;
4168 }
4169 buf = new_buf;
4170 }
4171 memcpy(&buf[len], ctxt->input->cur, nbchar);
4172 len += nbchar;
4173 buf[len] = 0;
4174 }
4175 }
4176 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004177 if (*in == 0xA) {
4178 in++;
4179 ctxt->input->line++; ctxt->input->col = 1;
4180 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004181 if (*in == 0xD) {
4182 in++;
4183 if (*in == 0xA) {
4184 ctxt->input->cur = in;
4185 in++;
4186 ctxt->input->line++; ctxt->input->col = 1;
4187 continue; /* while */
4188 }
4189 in--;
4190 }
4191 SHRINK;
4192 GROW;
4193 in = ctxt->input->cur;
4194 if (*in == '-') {
4195 if (in[1] == '-') {
4196 if (in[2] == '>') {
4197 SKIP(3);
4198 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4199 (!ctxt->disableSAX)) {
4200 if (buf != NULL)
4201 ctxt->sax->comment(ctxt->userData, buf);
4202 else
4203 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4204 }
4205 if (buf != NULL)
4206 xmlFree(buf);
4207 ctxt->instate = state;
4208 return;
4209 }
4210 if (buf != NULL)
4211 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4212 "Comment not terminated \n<!--%.50s\n",
4213 buf);
4214 else
4215 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4216 "Comment not terminated \n", NULL);
4217 in++;
4218 ctxt->input->col++;
4219 }
4220 in++;
4221 ctxt->input->col++;
4222 goto get_more;
4223 }
4224 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4225 xmlParseCommentComplex(ctxt, buf, len, size);
4226 ctxt->instate = state;
4227 return;
4228}
4229
Owen Taylor3473f882001-02-23 17:55:21 +00004230
4231/**
4232 * xmlParsePITarget:
4233 * @ctxt: an XML parser context
4234 *
4235 * parse the name of a PI
4236 *
4237 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4238 *
4239 * Returns the PITarget name or NULL
4240 */
4241
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004242const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004243xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004244 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004245
4246 name = xmlParseName(ctxt);
4247 if ((name != NULL) &&
4248 ((name[0] == 'x') || (name[0] == 'X')) &&
4249 ((name[1] == 'm') || (name[1] == 'M')) &&
4250 ((name[2] == 'l') || (name[2] == 'L'))) {
4251 int i;
4252 if ((name[0] == 'x') && (name[1] == 'm') &&
4253 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004254 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004255 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004256 return(name);
4257 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004258 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 return(name);
4260 }
4261 for (i = 0;;i++) {
4262 if (xmlW3CPIs[i] == NULL) break;
4263 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4264 return(name);
4265 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004266 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4267 "xmlParsePITarget: invalid name prefix 'xml'\n",
4268 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 return(name);
4271}
4272
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004273#ifdef LIBXML_CATALOG_ENABLED
4274/**
4275 * xmlParseCatalogPI:
4276 * @ctxt: an XML parser context
4277 * @catalog: the PI value string
4278 *
4279 * parse an XML Catalog Processing Instruction.
4280 *
4281 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4282 *
4283 * Occurs only if allowed by the user and if happening in the Misc
4284 * part of the document before any doctype informations
4285 * This will add the given catalog to the parsing context in order
4286 * to be used if there is a resolution need further down in the document
4287 */
4288
4289static void
4290xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4291 xmlChar *URL = NULL;
4292 const xmlChar *tmp, *base;
4293 xmlChar marker;
4294
4295 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004296 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004297 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4298 goto error;
4299 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004300 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004301 if (*tmp != '=') {
4302 return;
4303 }
4304 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004305 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004306 marker = *tmp;
4307 if ((marker != '\'') && (marker != '"'))
4308 goto error;
4309 tmp++;
4310 base = tmp;
4311 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4312 if (*tmp == 0)
4313 goto error;
4314 URL = xmlStrndup(base, tmp - base);
4315 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004316 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004317 if (*tmp != 0)
4318 goto error;
4319
4320 if (URL != NULL) {
4321 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4322 xmlFree(URL);
4323 }
4324 return;
4325
4326error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004327 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4328 "Catalog PI syntax error: %s\n",
4329 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004330 if (URL != NULL)
4331 xmlFree(URL);
4332}
4333#endif
4334
Owen Taylor3473f882001-02-23 17:55:21 +00004335/**
4336 * xmlParsePI:
4337 * @ctxt: an XML parser context
4338 *
4339 * parse an XML Processing Instruction.
4340 *
4341 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4342 *
4343 * The processing is transfered to SAX once parsed.
4344 */
4345
4346void
4347xmlParsePI(xmlParserCtxtPtr ctxt) {
4348 xmlChar *buf = NULL;
4349 int len = 0;
4350 int size = XML_PARSER_BUFFER_SIZE;
4351 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004352 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlParserInputState state;
4354 int count = 0;
4355
4356 if ((RAW == '<') && (NXT(1) == '?')) {
4357 xmlParserInputPtr input = ctxt->input;
4358 state = ctxt->instate;
4359 ctxt->instate = XML_PARSER_PI;
4360 /*
4361 * this is a Processing Instruction.
4362 */
4363 SKIP(2);
4364 SHRINK;
4365
4366 /*
4367 * Parse the target name and check for special support like
4368 * namespace.
4369 */
4370 target = xmlParsePITarget(ctxt);
4371 if (target != NULL) {
4372 if ((RAW == '?') && (NXT(1) == '>')) {
4373 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004374 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4375 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004376 }
4377 SKIP(2);
4378
4379 /*
4380 * SAX: PI detected.
4381 */
4382 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4383 (ctxt->sax->processingInstruction != NULL))
4384 ctxt->sax->processingInstruction(ctxt->userData,
4385 target, NULL);
4386 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004387 return;
4388 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004389 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004391 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 ctxt->instate = state;
4393 return;
4394 }
4395 cur = CUR;
4396 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004397 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4398 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
4400 SKIP_BLANKS;
4401 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004402 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004403 ((cur != '?') || (NXT(1) != '>'))) {
4404 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004405 xmlChar *tmp;
4406
Owen Taylor3473f882001-02-23 17:55:21 +00004407 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004408 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4409 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004410 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004411 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004412 ctxt->instate = state;
4413 return;
4414 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004415 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 }
4417 count++;
4418 if (count > 50) {
4419 GROW;
4420 count = 0;
4421 }
4422 COPY_BUF(l,buf,len,cur);
4423 NEXTL(l);
4424 cur = CUR_CHAR(l);
4425 if (cur == 0) {
4426 SHRINK;
4427 GROW;
4428 cur = CUR_CHAR(l);
4429 }
4430 }
4431 buf[len] = 0;
4432 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004433 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4434 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004435 } else {
4436 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4438 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004439 }
4440 SKIP(2);
4441
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004442#ifdef LIBXML_CATALOG_ENABLED
4443 if (((state == XML_PARSER_MISC) ||
4444 (state == XML_PARSER_START)) &&
4445 (xmlStrEqual(target, XML_CATALOG_PI))) {
4446 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4447 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4448 (allow == XML_CATA_ALLOW_ALL))
4449 xmlParseCatalogPI(ctxt, buf);
4450 }
4451#endif
4452
4453
Owen Taylor3473f882001-02-23 17:55:21 +00004454 /*
4455 * SAX: PI detected.
4456 */
4457 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4458 (ctxt->sax->processingInstruction != NULL))
4459 ctxt->sax->processingInstruction(ctxt->userData,
4460 target, buf);
4461 }
4462 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004463 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004464 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004465 }
4466 ctxt->instate = state;
4467 }
4468}
4469
4470/**
4471 * xmlParseNotationDecl:
4472 * @ctxt: an XML parser context
4473 *
4474 * parse a notation declaration
4475 *
4476 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4477 *
4478 * Hence there is actually 3 choices:
4479 * 'PUBLIC' S PubidLiteral
4480 * 'PUBLIC' S PubidLiteral S SystemLiteral
4481 * and 'SYSTEM' S SystemLiteral
4482 *
4483 * See the NOTE on xmlParseExternalID().
4484 */
4485
4486void
4487xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004488 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004489 xmlChar *Pubid;
4490 xmlChar *Systemid;
4491
Daniel Veillarda07050d2003-10-19 14:46:32 +00004492 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004493 xmlParserInputPtr input = ctxt->input;
4494 SHRINK;
4495 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004496 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 return;
4500 }
4501 SKIP_BLANKS;
4502
Daniel Veillard76d66f42001-05-16 21:05:17 +00004503 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004504 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004506 return;
4507 }
William M. Brack76e95df2003-10-18 16:20:14 +00004508 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004510 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004511 return;
4512 }
4513 SKIP_BLANKS;
4514
4515 /*
4516 * Parse the IDs.
4517 */
4518 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4519 SKIP_BLANKS;
4520
4521 if (RAW == '>') {
4522 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4524 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004525 }
4526 NEXT;
4527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4528 (ctxt->sax->notationDecl != NULL))
4529 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4530 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004531 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004532 }
Owen Taylor3473f882001-02-23 17:55:21 +00004533 if (Systemid != NULL) xmlFree(Systemid);
4534 if (Pubid != NULL) xmlFree(Pubid);
4535 }
4536}
4537
4538/**
4539 * xmlParseEntityDecl:
4540 * @ctxt: an XML parser context
4541 *
4542 * parse <!ENTITY declarations
4543 *
4544 * [70] EntityDecl ::= GEDecl | PEDecl
4545 *
4546 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4547 *
4548 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4549 *
4550 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4551 *
4552 * [74] PEDef ::= EntityValue | ExternalID
4553 *
4554 * [76] NDataDecl ::= S 'NDATA' S Name
4555 *
4556 * [ VC: Notation Declared ]
4557 * The Name must match the declared name of a notation.
4558 */
4559
4560void
4561xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004562 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 xmlChar *value = NULL;
4564 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004565 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004566 int isParameter = 0;
4567 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004568 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004569
Daniel Veillard4c778d82005-01-23 17:37:44 +00004570 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004571 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004572 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 SHRINK;
4574 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004575 skipped = SKIP_BLANKS;
4576 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4578 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004579 }
Owen Taylor3473f882001-02-23 17:55:21 +00004580
4581 if (RAW == '%') {
4582 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004583 skipped = SKIP_BLANKS;
4584 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4586 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004587 }
Owen Taylor3473f882001-02-23 17:55:21 +00004588 isParameter = 1;
4589 }
4590
Daniel Veillard76d66f42001-05-16 21:05:17 +00004591 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004593 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4594 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004595 return;
4596 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004597 skipped = SKIP_BLANKS;
4598 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4600 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004601 }
Owen Taylor3473f882001-02-23 17:55:21 +00004602
Daniel Veillardf5582f12002-06-11 10:08:16 +00004603 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004604 /*
4605 * handle the various case of definitions...
4606 */
4607 if (isParameter) {
4608 if ((RAW == '"') || (RAW == '\'')) {
4609 value = xmlParseEntityValue(ctxt, &orig);
4610 if (value) {
4611 if ((ctxt->sax != NULL) &&
4612 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4613 ctxt->sax->entityDecl(ctxt->userData, name,
4614 XML_INTERNAL_PARAMETER_ENTITY,
4615 NULL, NULL, value);
4616 }
4617 } else {
4618 URI = xmlParseExternalID(ctxt, &literal, 1);
4619 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 }
4622 if (URI) {
4623 xmlURIPtr uri;
4624
4625 uri = xmlParseURI((const char *) URI);
4626 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004627 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4628 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004629 /*
4630 * This really ought to be a well formedness error
4631 * but the XML Core WG decided otherwise c.f. issue
4632 * E26 of the XML erratas.
4633 */
Owen Taylor3473f882001-02-23 17:55:21 +00004634 } else {
4635 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004636 /*
4637 * Okay this is foolish to block those but not
4638 * invalid URIs.
4639 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004640 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004641 } else {
4642 if ((ctxt->sax != NULL) &&
4643 (!ctxt->disableSAX) &&
4644 (ctxt->sax->entityDecl != NULL))
4645 ctxt->sax->entityDecl(ctxt->userData, name,
4646 XML_EXTERNAL_PARAMETER_ENTITY,
4647 literal, URI, NULL);
4648 }
4649 xmlFreeURI(uri);
4650 }
4651 }
4652 }
4653 } else {
4654 if ((RAW == '"') || (RAW == '\'')) {
4655 value = xmlParseEntityValue(ctxt, &orig);
4656 if ((ctxt->sax != NULL) &&
4657 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4658 ctxt->sax->entityDecl(ctxt->userData, name,
4659 XML_INTERNAL_GENERAL_ENTITY,
4660 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004661 /*
4662 * For expat compatibility in SAX mode.
4663 */
4664 if ((ctxt->myDoc == NULL) ||
4665 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4666 if (ctxt->myDoc == NULL) {
4667 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004668 if (ctxt->myDoc == NULL) {
4669 xmlErrMemory(ctxt, "New Doc failed");
4670 return;
4671 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004672 }
4673 if (ctxt->myDoc->intSubset == NULL)
4674 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4675 BAD_CAST "fake", NULL, NULL);
4676
Daniel Veillard1af9a412003-08-20 22:54:39 +00004677 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4678 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004679 }
Owen Taylor3473f882001-02-23 17:55:21 +00004680 } else {
4681 URI = xmlParseExternalID(ctxt, &literal, 1);
4682 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004683 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 if (URI) {
4686 xmlURIPtr uri;
4687
4688 uri = xmlParseURI((const char *)URI);
4689 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004690 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4691 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004692 /*
4693 * This really ought to be a well formedness error
4694 * but the XML Core WG decided otherwise c.f. issue
4695 * E26 of the XML erratas.
4696 */
Owen Taylor3473f882001-02-23 17:55:21 +00004697 } else {
4698 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004699 /*
4700 * Okay this is foolish to block those but not
4701 * invalid URIs.
4702 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004703 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
4705 xmlFreeURI(uri);
4706 }
4707 }
William M. Brack76e95df2003-10-18 16:20:14 +00004708 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4710 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004711 }
4712 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004713 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004714 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004715 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4717 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004720 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004721 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4722 (ctxt->sax->unparsedEntityDecl != NULL))
4723 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4724 literal, URI, ndata);
4725 } else {
4726 if ((ctxt->sax != NULL) &&
4727 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4728 ctxt->sax->entityDecl(ctxt->userData, name,
4729 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4730 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004731 /*
4732 * For expat compatibility in SAX mode.
4733 * assuming the entity repalcement was asked for
4734 */
4735 if ((ctxt->replaceEntities != 0) &&
4736 ((ctxt->myDoc == NULL) ||
4737 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4738 if (ctxt->myDoc == NULL) {
4739 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004740 if (ctxt->myDoc == NULL) {
4741 xmlErrMemory(ctxt, "New Doc failed");
4742 return;
4743 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004744 }
4745
4746 if (ctxt->myDoc->intSubset == NULL)
4747 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4748 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004749 xmlSAX2EntityDecl(ctxt, name,
4750 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4751 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004752 }
Owen Taylor3473f882001-02-23 17:55:21 +00004753 }
4754 }
4755 }
4756 SKIP_BLANKS;
4757 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004758 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004759 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 } else {
4761 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4763 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004764 }
4765 NEXT;
4766 }
4767 if (orig != NULL) {
4768 /*
4769 * Ugly mechanism to save the raw entity value.
4770 */
4771 xmlEntityPtr cur = NULL;
4772
4773 if (isParameter) {
4774 if ((ctxt->sax != NULL) &&
4775 (ctxt->sax->getParameterEntity != NULL))
4776 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4777 } else {
4778 if ((ctxt->sax != NULL) &&
4779 (ctxt->sax->getEntity != NULL))
4780 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004781 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004782 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004783 }
Owen Taylor3473f882001-02-23 17:55:21 +00004784 }
4785 if (cur != NULL) {
4786 if (cur->orig != NULL)
4787 xmlFree(orig);
4788 else
4789 cur->orig = orig;
4790 } else
4791 xmlFree(orig);
4792 }
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (value != NULL) xmlFree(value);
4794 if (URI != NULL) xmlFree(URI);
4795 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
4797}
4798
4799/**
4800 * xmlParseDefaultDecl:
4801 * @ctxt: an XML parser context
4802 * @value: Receive a possible fixed default value for the attribute
4803 *
4804 * Parse an attribute default declaration
4805 *
4806 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4807 *
4808 * [ VC: Required Attribute ]
4809 * if the default declaration is the keyword #REQUIRED, then the
4810 * attribute must be specified for all elements of the type in the
4811 * attribute-list declaration.
4812 *
4813 * [ VC: Attribute Default Legal ]
4814 * The declared default value must meet the lexical constraints of
4815 * the declared attribute type c.f. xmlValidateAttributeDecl()
4816 *
4817 * [ VC: Fixed Attribute Default ]
4818 * if an attribute has a default value declared with the #FIXED
4819 * keyword, instances of that attribute must match the default value.
4820 *
4821 * [ WFC: No < in Attribute Values ]
4822 * handled in xmlParseAttValue()
4823 *
4824 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4825 * or XML_ATTRIBUTE_FIXED.
4826 */
4827
4828int
4829xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4830 int val;
4831 xmlChar *ret;
4832
4833 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004834 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004835 SKIP(9);
4836 return(XML_ATTRIBUTE_REQUIRED);
4837 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004838 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004839 SKIP(8);
4840 return(XML_ATTRIBUTE_IMPLIED);
4841 }
4842 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(6);
4845 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
4850 SKIP_BLANKS;
4851 }
4852 ret = xmlParseAttValue(ctxt);
4853 ctxt->instate = XML_PARSER_DTD;
4854 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004855 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004856 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 } else
4858 *value = ret;
4859 return(val);
4860}
4861
4862/**
4863 * xmlParseNotationType:
4864 * @ctxt: an XML parser context
4865 *
4866 * parse an Notation attribute type.
4867 *
4868 * Note: the leading 'NOTATION' S part has already being parsed...
4869 *
4870 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4871 *
4872 * [ VC: Notation Attributes ]
4873 * Values of this type must match one of the notation names included
4874 * in the declaration; all notation names in the declaration must be declared.
4875 *
4876 * Returns: the notation attribute tree built while parsing
4877 */
4878
4879xmlEnumerationPtr
4880xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004881 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004882 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4883
4884 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004885 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(NULL);
4887 }
4888 SHRINK;
4889 do {
4890 NEXT;
4891 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004892 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004893 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004894 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4895 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004896 return(ret);
4897 }
4898 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (cur == NULL) return(ret);
4900 if (last == NULL) ret = last = cur;
4901 else {
4902 last->next = cur;
4903 last = cur;
4904 }
4905 SKIP_BLANKS;
4906 } while (RAW == '|');
4907 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004908 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 if ((last != NULL) && (last != ret))
4910 xmlFreeEnumeration(last);
4911 return(ret);
4912 }
4913 NEXT;
4914 return(ret);
4915}
4916
4917/**
4918 * xmlParseEnumerationType:
4919 * @ctxt: an XML parser context
4920 *
4921 * parse an Enumeration attribute type.
4922 *
4923 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4924 *
4925 * [ VC: Enumeration ]
4926 * Values of this type must match one of the Nmtoken tokens in
4927 * the declaration
4928 *
4929 * Returns: the enumeration attribute tree built while parsing
4930 */
4931
4932xmlEnumerationPtr
4933xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4934 xmlChar *name;
4935 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4936
4937 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004938 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004939 return(NULL);
4940 }
4941 SHRINK;
4942 do {
4943 NEXT;
4944 SKIP_BLANKS;
4945 name = xmlParseNmtoken(ctxt);
4946 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004947 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004948 return(ret);
4949 }
4950 cur = xmlCreateEnumeration(name);
4951 xmlFree(name);
4952 if (cur == NULL) return(ret);
4953 if (last == NULL) ret = last = cur;
4954 else {
4955 last->next = cur;
4956 last = cur;
4957 }
4958 SKIP_BLANKS;
4959 } while (RAW == '|');
4960 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004961 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 return(ret);
4963 }
4964 NEXT;
4965 return(ret);
4966}
4967
4968/**
4969 * xmlParseEnumeratedType:
4970 * @ctxt: an XML parser context
4971 * @tree: the enumeration tree built while parsing
4972 *
4973 * parse an Enumerated attribute type.
4974 *
4975 * [57] EnumeratedType ::= NotationType | Enumeration
4976 *
4977 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4978 *
4979 *
4980 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4981 */
4982
4983int
4984xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004985 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004986 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004987 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004988 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4989 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004990 return(0);
4991 }
4992 SKIP_BLANKS;
4993 *tree = xmlParseNotationType(ctxt);
4994 if (*tree == NULL) return(0);
4995 return(XML_ATTRIBUTE_NOTATION);
4996 }
4997 *tree = xmlParseEnumerationType(ctxt);
4998 if (*tree == NULL) return(0);
4999 return(XML_ATTRIBUTE_ENUMERATION);
5000}
5001
5002/**
5003 * xmlParseAttributeType:
5004 * @ctxt: an XML parser context
5005 * @tree: the enumeration tree built while parsing
5006 *
5007 * parse the Attribute list def for an element
5008 *
5009 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5010 *
5011 * [55] StringType ::= 'CDATA'
5012 *
5013 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5014 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5015 *
5016 * Validity constraints for attribute values syntax are checked in
5017 * xmlValidateAttributeValue()
5018 *
5019 * [ VC: ID ]
5020 * Values of type ID must match the Name production. A name must not
5021 * appear more than once in an XML document as a value of this type;
5022 * i.e., ID values must uniquely identify the elements which bear them.
5023 *
5024 * [ VC: One ID per Element Type ]
5025 * No element type may have more than one ID attribute specified.
5026 *
5027 * [ VC: ID Attribute Default ]
5028 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5029 *
5030 * [ VC: IDREF ]
5031 * Values of type IDREF must match the Name production, and values
5032 * of type IDREFS must match Names; each IDREF Name must match the value
5033 * of an ID attribute on some element in the XML document; i.e. IDREF
5034 * values must match the value of some ID attribute.
5035 *
5036 * [ VC: Entity Name ]
5037 * Values of type ENTITY must match the Name production, values
5038 * of type ENTITIES must match Names; each Entity Name must match the
5039 * name of an unparsed entity declared in the DTD.
5040 *
5041 * [ VC: Name Token ]
5042 * Values of type NMTOKEN must match the Nmtoken production; values
5043 * of type NMTOKENS must match Nmtokens.
5044 *
5045 * Returns the attribute type
5046 */
5047int
5048xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5049 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005050 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005051 SKIP(5);
5052 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005053 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005054 SKIP(6);
5055 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005056 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005057 SKIP(5);
5058 return(XML_ATTRIBUTE_IDREF);
5059 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5060 SKIP(2);
5061 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005062 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005063 SKIP(6);
5064 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005065 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005066 SKIP(8);
5067 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005068 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005069 SKIP(8);
5070 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005071 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005072 SKIP(7);
5073 return(XML_ATTRIBUTE_NMTOKEN);
5074 }
5075 return(xmlParseEnumeratedType(ctxt, tree));
5076}
5077
5078/**
5079 * xmlParseAttributeListDecl:
5080 * @ctxt: an XML parser context
5081 *
5082 * : parse the Attribute list def for an element
5083 *
5084 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5085 *
5086 * [53] AttDef ::= S Name S AttType S DefaultDecl
5087 *
5088 */
5089void
5090xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005091 const xmlChar *elemName;
5092 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005093 xmlEnumerationPtr tree;
5094
Daniel Veillarda07050d2003-10-19 14:46:32 +00005095 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005096 xmlParserInputPtr input = ctxt->input;
5097
5098 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005099 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005101 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005104 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005105 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5107 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005108 return;
5109 }
5110 SKIP_BLANKS;
5111 GROW;
5112 while (RAW != '>') {
5113 const xmlChar *check = CUR_PTR;
5114 int type;
5115 int def;
5116 xmlChar *defaultValue = NULL;
5117
5118 GROW;
5119 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005120 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005121 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5123 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005124 break;
5125 }
5126 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005127 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005129 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005130 break;
5131 }
5132 SKIP_BLANKS;
5133
5134 type = xmlParseAttributeType(ctxt, &tree);
5135 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005136 break;
5137 }
5138
5139 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005140 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5142 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005143 if (tree != NULL)
5144 xmlFreeEnumeration(tree);
5145 break;
5146 }
5147 SKIP_BLANKS;
5148
5149 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5150 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005151 if (defaultValue != NULL)
5152 xmlFree(defaultValue);
5153 if (tree != NULL)
5154 xmlFreeEnumeration(tree);
5155 break;
5156 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005157 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5158 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005159
5160 GROW;
5161 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005162 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005163 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005164 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005165 if (defaultValue != NULL)
5166 xmlFree(defaultValue);
5167 if (tree != NULL)
5168 xmlFreeEnumeration(tree);
5169 break;
5170 }
5171 SKIP_BLANKS;
5172 }
5173 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005174 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5175 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005176 if (defaultValue != NULL)
5177 xmlFree(defaultValue);
5178 if (tree != NULL)
5179 xmlFreeEnumeration(tree);
5180 break;
5181 }
5182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5183 (ctxt->sax->attributeDecl != NULL))
5184 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5185 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005186 else if (tree != NULL)
5187 xmlFreeEnumeration(tree);
5188
5189 if ((ctxt->sax2) && (defaultValue != NULL) &&
5190 (def != XML_ATTRIBUTE_IMPLIED) &&
5191 (def != XML_ATTRIBUTE_REQUIRED)) {
5192 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5193 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005194 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005195 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5196 }
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (defaultValue != NULL)
5198 xmlFree(defaultValue);
5199 GROW;
5200 }
5201 if (RAW == '>') {
5202 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005203 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5204 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206 NEXT;
5207 }
Owen Taylor3473f882001-02-23 17:55:21 +00005208 }
5209}
5210
5211/**
5212 * xmlParseElementMixedContentDecl:
5213 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005214 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005215 *
5216 * parse the declaration for a Mixed Element content
5217 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5218 *
5219 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5220 * '(' S? '#PCDATA' S? ')'
5221 *
5222 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5223 *
5224 * [ VC: No Duplicate Types ]
5225 * The same name must not appear more than once in a single
5226 * mixed-content declaration.
5227 *
5228 * returns: the list of the xmlElementContentPtr describing the element choices
5229 */
5230xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005231xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005232 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005233 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005234
5235 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005236 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005237 SKIP(7);
5238 SKIP_BLANKS;
5239 SHRINK;
5240 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005241 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005242 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5243"Element content declaration doesn't start and stop in the same entity\n",
5244 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005245 }
Owen Taylor3473f882001-02-23 17:55:21 +00005246 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005247 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005248 if (ret == NULL)
5249 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 if (RAW == '*') {
5251 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5252 NEXT;
5253 }
5254 return(ret);
5255 }
5256 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005257 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 if (ret == NULL) return(NULL);
5259 }
5260 while (RAW == '|') {
5261 NEXT;
5262 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 if (ret == NULL) return(NULL);
5265 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005266 if (cur != NULL)
5267 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 cur = ret;
5269 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005272 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005273 if (n->c1 != NULL)
5274 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005276 if (n != NULL)
5277 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005281 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005284 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005285 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 return(NULL);
5287 }
5288 SKIP_BLANKS;
5289 GROW;
5290 }
5291 if ((RAW == ')') && (NXT(1) == '*')) {
5292 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005293 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005294 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005295 if (cur->c2 != NULL)
5296 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005297 }
5298 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005300 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5301"Element content declaration doesn't start and stop in the same entity\n",
5302 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005303 }
Owen Taylor3473f882001-02-23 17:55:21 +00005304 SKIP(2);
5305 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005306 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 return(NULL);
5309 }
5310
5311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005312 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
5314 return(ret);
5315}
5316
5317/**
5318 * xmlParseElementChildrenContentDecl:
5319 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005320 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005321 *
5322 * parse the declaration for a Mixed Element content
5323 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5324 *
5325 *
5326 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5327 *
5328 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5329 *
5330 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5331 *
5332 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5333 *
5334 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5335 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005336 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005337 * opening or closing parentheses in a choice, seq, or Mixed
5338 * construct is contained in the replacement text for a parameter
5339 * entity, both must be contained in the same replacement text. For
5340 * interoperability, if a parameter-entity reference appears in a
5341 * choice, seq, or Mixed construct, its replacement text should not
5342 * be empty, and neither the first nor last non-blank character of
5343 * the replacement text should be a connector (| or ,).
5344 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005345 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005346 * hierarchy.
5347 */
5348xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005349xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005351 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005352 xmlChar type = 0;
5353
5354 SKIP_BLANKS;
5355 GROW;
5356 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005358
Owen Taylor3473f882001-02-23 17:55:21 +00005359 /* Recurse on first child */
5360 NEXT;
5361 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005362 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 SKIP_BLANKS;
5364 GROW;
5365 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005366 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005367 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005368 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return(NULL);
5370 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005371 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005372 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005373 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005374 return(NULL);
5375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376 GROW;
5377 if (RAW == '?') {
5378 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5379 NEXT;
5380 } else if (RAW == '*') {
5381 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5382 NEXT;
5383 } else if (RAW == '+') {
5384 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5385 NEXT;
5386 } else {
5387 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5388 }
Owen Taylor3473f882001-02-23 17:55:21 +00005389 GROW;
5390 }
5391 SKIP_BLANKS;
5392 SHRINK;
5393 while (RAW != ')') {
5394 /*
5395 * Each loop we parse one separator and one element.
5396 */
5397 if (RAW == ',') {
5398 if (type == 0) type = CUR;
5399
5400 /*
5401 * Detect "Name | Name , Name" error
5402 */
5403 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005405 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005406 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005407 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005408 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005410 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 return(NULL);
5412 }
5413 NEXT;
5414
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005415 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005416 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005417 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005418 xmlFreeDocElementContent(ctxt->myDoc, last);
5419 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 return(NULL);
5421 }
5422 if (last == NULL) {
5423 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005424 if (ret != NULL)
5425 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 ret = cur = op;
5427 } else {
5428 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005429 if (op != NULL)
5430 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005431 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005432 if (last != NULL)
5433 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005434 cur =op;
5435 last = NULL;
5436 }
5437 } else if (RAW == '|') {
5438 if (type == 0) type = CUR;
5439
5440 /*
5441 * Detect "Name , Name | Name" error
5442 */
5443 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005444 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005445 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005446 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005447 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005448 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005449 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005450 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 return(NULL);
5452 }
5453 NEXT;
5454
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005455 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005457 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005458 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005460 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 return(NULL);
5462 }
5463 if (last == NULL) {
5464 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005465 if (ret != NULL)
5466 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005467 ret = cur = op;
5468 } else {
5469 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005470 if (op != NULL)
5471 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005473 if (last != NULL)
5474 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005475 cur =op;
5476 last = NULL;
5477 }
5478 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005479 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005480 if ((last != NULL) && (last != ret))
5481 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005483 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 return(NULL);
5485 }
5486 GROW;
5487 SKIP_BLANKS;
5488 GROW;
5489 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005490 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005491 /* Recurse on second child */
5492 NEXT;
5493 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005494 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005495 SKIP_BLANKS;
5496 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005497 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005499 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005501 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005502 return(NULL);
5503 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005504 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005505 if (last == NULL) {
5506 if (ret != NULL)
5507 xmlFreeDocElementContent(ctxt->myDoc, ret);
5508 return(NULL);
5509 }
Owen Taylor3473f882001-02-23 17:55:21 +00005510 if (RAW == '?') {
5511 last->ocur = XML_ELEMENT_CONTENT_OPT;
5512 NEXT;
5513 } else if (RAW == '*') {
5514 last->ocur = XML_ELEMENT_CONTENT_MULT;
5515 NEXT;
5516 } else if (RAW == '+') {
5517 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5518 NEXT;
5519 } else {
5520 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5521 }
5522 }
5523 SKIP_BLANKS;
5524 GROW;
5525 }
5526 if ((cur != NULL) && (last != NULL)) {
5527 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005528 if (last != NULL)
5529 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005531 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005532 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5533"Element content declaration doesn't start and stop in the same entity\n",
5534 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005535 }
Owen Taylor3473f882001-02-23 17:55:21 +00005536 NEXT;
5537 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005538 if (ret != NULL) {
5539 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5540 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5541 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5542 else
5543 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5544 }
Owen Taylor3473f882001-02-23 17:55:21 +00005545 NEXT;
5546 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005547 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005548 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005549 cur = ret;
5550 /*
5551 * Some normalization:
5552 * (a | b* | c?)* == (a | b | c)*
5553 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005554 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005555 if ((cur->c1 != NULL) &&
5556 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5557 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5558 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5559 if ((cur->c2 != NULL) &&
5560 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5561 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5562 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5563 cur = cur->c2;
5564 }
5565 }
Owen Taylor3473f882001-02-23 17:55:21 +00005566 NEXT;
5567 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005568 if (ret != NULL) {
5569 int found = 0;
5570
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005571 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5572 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005574 else
5575 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005576 /*
5577 * Some normalization:
5578 * (a | b*)+ == (a | b)*
5579 * (a | b?)+ == (a | b)*
5580 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005581 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005582 if ((cur->c1 != NULL) &&
5583 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5584 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5585 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5586 found = 1;
5587 }
5588 if ((cur->c2 != NULL) &&
5589 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5590 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5591 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5592 found = 1;
5593 }
5594 cur = cur->c2;
5595 }
5596 if (found)
5597 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5598 }
Owen Taylor3473f882001-02-23 17:55:21 +00005599 NEXT;
5600 }
5601 return(ret);
5602}
5603
5604/**
5605 * xmlParseElementContentDecl:
5606 * @ctxt: an XML parser context
5607 * @name: the name of the element being defined.
5608 * @result: the Element Content pointer will be stored here if any
5609 *
5610 * parse the declaration for an Element content either Mixed or Children,
5611 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5612 *
5613 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5614 *
5615 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5616 */
5617
5618int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005619xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005620 xmlElementContentPtr *result) {
5621
5622 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005623 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005624 int res;
5625
5626 *result = NULL;
5627
5628 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005629 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005630 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005631 return(-1);
5632 }
5633 NEXT;
5634 GROW;
5635 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005636 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005637 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005638 res = XML_ELEMENT_TYPE_MIXED;
5639 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005641 res = XML_ELEMENT_TYPE_ELEMENT;
5642 }
Owen Taylor3473f882001-02-23 17:55:21 +00005643 SKIP_BLANKS;
5644 *result = tree;
5645 return(res);
5646}
5647
5648/**
5649 * xmlParseElementDecl:
5650 * @ctxt: an XML parser context
5651 *
5652 * parse an Element declaration.
5653 *
5654 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5655 *
5656 * [ VC: Unique Element Type Declaration ]
5657 * No element type may be declared more than once
5658 *
5659 * Returns the type of the element, or -1 in case of error
5660 */
5661int
5662xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005663 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005664 int ret = -1;
5665 xmlElementContentPtr content = NULL;
5666
Daniel Veillard4c778d82005-01-23 17:37:44 +00005667 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005668 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005669 xmlParserInputPtr input = ctxt->input;
5670
5671 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005672 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005675 }
5676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5680 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005681 return(-1);
5682 }
5683 while ((RAW == 0) && (ctxt->inputNr > 1))
5684 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005685 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5687 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005688 }
5689 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(5);
5692 /*
5693 * Element must always be empty.
5694 */
5695 ret = XML_ELEMENT_TYPE_EMPTY;
5696 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5697 (NXT(2) == 'Y')) {
5698 SKIP(3);
5699 /*
5700 * Element is a generic container.
5701 */
5702 ret = XML_ELEMENT_TYPE_ANY;
5703 } else if (RAW == '(') {
5704 ret = xmlParseElementContentDecl(ctxt, name, &content);
5705 } else {
5706 /*
5707 * [ WFC: PEs in Internal Subset ] error handling.
5708 */
5709 if ((RAW == '%') && (ctxt->external == 0) &&
5710 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005711 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005712 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005713 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005714 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005715 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5716 }
Owen Taylor3473f882001-02-23 17:55:21 +00005717 return(-1);
5718 }
5719
5720 SKIP_BLANKS;
5721 /*
5722 * Pop-up of finished entities.
5723 */
5724 while ((RAW == 0) && (ctxt->inputNr > 1))
5725 xmlPopInput(ctxt);
5726 SKIP_BLANKS;
5727
5728 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005729 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 if (content != NULL) {
5731 xmlFreeDocElementContent(ctxt->myDoc, content);
5732 }
Owen Taylor3473f882001-02-23 17:55:21 +00005733 } else {
5734 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5736 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738
5739 NEXT;
5740 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005741 (ctxt->sax->elementDecl != NULL)) {
5742 if (content != NULL)
5743 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5745 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005746 if ((content != NULL) && (content->parent == NULL)) {
5747 /*
5748 * this is a trick: if xmlAddElementDecl is called,
5749 * instead of copying the full tree it is plugged directly
5750 * if called from the parser. Avoid duplicating the
5751 * interfaces or change the API/ABI
5752 */
5753 xmlFreeDocElementContent(ctxt->myDoc, content);
5754 }
5755 } else if (content != NULL) {
5756 xmlFreeDocElementContent(ctxt->myDoc, content);
5757 }
Owen Taylor3473f882001-02-23 17:55:21 +00005758 }
Owen Taylor3473f882001-02-23 17:55:21 +00005759 }
5760 return(ret);
5761}
5762
5763/**
Owen Taylor3473f882001-02-23 17:55:21 +00005764 * xmlParseConditionalSections
5765 * @ctxt: an XML parser context
5766 *
5767 * [61] conditionalSect ::= includeSect | ignoreSect
5768 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5769 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5770 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5771 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5772 */
5773
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005774static void
Owen Taylor3473f882001-02-23 17:55:21 +00005775xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5776 SKIP(3);
5777 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005778 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005779 SKIP(7);
5780 SKIP_BLANKS;
5781 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005782 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 } else {
5784 NEXT;
5785 }
5786 if (xmlParserDebugEntities) {
5787 if ((ctxt->input != NULL) && (ctxt->input->filename))
5788 xmlGenericError(xmlGenericErrorContext,
5789 "%s(%d): ", ctxt->input->filename,
5790 ctxt->input->line);
5791 xmlGenericError(xmlGenericErrorContext,
5792 "Entering INCLUDE Conditional Section\n");
5793 }
5794
5795 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5796 (NXT(2) != '>'))) {
5797 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005798 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5801 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005802 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005803 NEXT;
5804 } else if (RAW == '%') {
5805 xmlParsePEReference(ctxt);
5806 } else
5807 xmlParseMarkupDecl(ctxt);
5808
5809 /*
5810 * Pop-up of finished entities.
5811 */
5812 while ((RAW == 0) && (ctxt->inputNr > 1))
5813 xmlPopInput(ctxt);
5814
Daniel Veillardfdc91562002-07-01 21:52:03 +00005815 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005816 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005817 break;
5818 }
5819 }
5820 if (xmlParserDebugEntities) {
5821 if ((ctxt->input != NULL) && (ctxt->input->filename))
5822 xmlGenericError(xmlGenericErrorContext,
5823 "%s(%d): ", ctxt->input->filename,
5824 ctxt->input->line);
5825 xmlGenericError(xmlGenericErrorContext,
5826 "Leaving INCLUDE Conditional Section\n");
5827 }
5828
Daniel Veillarda07050d2003-10-19 14:46:32 +00005829 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005830 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005831 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005832 int depth = 0;
5833
5834 SKIP(6);
5835 SKIP_BLANKS;
5836 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005837 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 } else {
5839 NEXT;
5840 }
5841 if (xmlParserDebugEntities) {
5842 if ((ctxt->input != NULL) && (ctxt->input->filename))
5843 xmlGenericError(xmlGenericErrorContext,
5844 "%s(%d): ", ctxt->input->filename,
5845 ctxt->input->line);
5846 xmlGenericError(xmlGenericErrorContext,
5847 "Entering IGNORE Conditional Section\n");
5848 }
5849
5850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005851 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005852 * But disable SAX event generating DTD building in the meantime
5853 */
5854 state = ctxt->disableSAX;
5855 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005857 ctxt->instate = XML_PARSER_IGNORE;
5858
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005859 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5861 depth++;
5862 SKIP(3);
5863 continue;
5864 }
5865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5866 if (--depth >= 0) SKIP(3);
5867 continue;
5868 }
5869 NEXT;
5870 continue;
5871 }
5872
5873 ctxt->disableSAX = state;
5874 ctxt->instate = instate;
5875
5876 if (xmlParserDebugEntities) {
5877 if ((ctxt->input != NULL) && (ctxt->input->filename))
5878 xmlGenericError(xmlGenericErrorContext,
5879 "%s(%d): ", ctxt->input->filename,
5880 ctxt->input->line);
5881 xmlGenericError(xmlGenericErrorContext,
5882 "Leaving IGNORE Conditional Section\n");
5883 }
5884
5885 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 }
5888
5889 if (RAW == 0)
5890 SHRINK;
5891
5892 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 } else {
5895 SKIP(3);
5896 }
5897}
5898
5899/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005900 * xmlParseMarkupDecl:
5901 * @ctxt: an XML parser context
5902 *
5903 * parse Markup declarations
5904 *
5905 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5906 * NotationDecl | PI | Comment
5907 *
5908 * [ VC: Proper Declaration/PE Nesting ]
5909 * Parameter-entity replacement text must be properly nested with
5910 * markup declarations. That is to say, if either the first character
5911 * or the last character of a markup declaration (markupdecl above) is
5912 * contained in the replacement text for a parameter-entity reference,
5913 * both must be contained in the same replacement text.
5914 *
5915 * [ WFC: PEs in Internal Subset ]
5916 * In the internal DTD subset, parameter-entity references can occur
5917 * only where markup declarations can occur, not within markup declarations.
5918 * (This does not apply to references that occur in external parameter
5919 * entities or to the external subset.)
5920 */
5921void
5922xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5923 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005924 if (CUR == '<') {
5925 if (NXT(1) == '!') {
5926 switch (NXT(2)) {
5927 case 'E':
5928 if (NXT(3) == 'L')
5929 xmlParseElementDecl(ctxt);
5930 else if (NXT(3) == 'N')
5931 xmlParseEntityDecl(ctxt);
5932 break;
5933 case 'A':
5934 xmlParseAttributeListDecl(ctxt);
5935 break;
5936 case 'N':
5937 xmlParseNotationDecl(ctxt);
5938 break;
5939 case '-':
5940 xmlParseComment(ctxt);
5941 break;
5942 default:
5943 /* there is an error but it will be detected later */
5944 break;
5945 }
5946 } else if (NXT(1) == '?') {
5947 xmlParsePI(ctxt);
5948 }
5949 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005950 /*
5951 * This is only for internal subset. On external entities,
5952 * the replacement is done before parsing stage
5953 */
5954 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5955 xmlParsePEReference(ctxt);
5956
5957 /*
5958 * Conditional sections are allowed from entities included
5959 * by PE References in the internal subset.
5960 */
5961 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5962 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5963 xmlParseConditionalSections(ctxt);
5964 }
5965 }
5966
5967 ctxt->instate = XML_PARSER_DTD;
5968}
5969
5970/**
5971 * xmlParseTextDecl:
5972 * @ctxt: an XML parser context
5973 *
5974 * parse an XML declaration header for external entities
5975 *
5976 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5977 *
5978 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5979 */
5980
5981void
5982xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5983 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005984 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005985
5986 /*
5987 * We know that '<?xml' is here.
5988 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005989 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005990 SKIP(5);
5991 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005992 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005993 return;
5994 }
5995
William M. Brack76e95df2003-10-18 16:20:14 +00005996 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5998 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005999 }
6000 SKIP_BLANKS;
6001
6002 /*
6003 * We may have the VersionInfo here.
6004 */
6005 version = xmlParseVersionInfo(ctxt);
6006 if (version == NULL)
6007 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006008 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006009 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6011 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006012 }
6013 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006014 ctxt->input->version = version;
6015
6016 /*
6017 * We must have the encoding declaration
6018 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006019 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6021 /*
6022 * The XML REC instructs us to stop parsing right here
6023 */
6024 return;
6025 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006026 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6027 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6028 "Missing encoding in text declaration\n");
6029 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006030
6031 SKIP_BLANKS;
6032 if ((RAW == '?') && (NXT(1) == '>')) {
6033 SKIP(2);
6034 } else if (RAW == '>') {
6035 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006037 NEXT;
6038 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006040 MOVETO_ENDTAG(CUR_PTR);
6041 NEXT;
6042 }
6043}
6044
6045/**
Owen Taylor3473f882001-02-23 17:55:21 +00006046 * xmlParseExternalSubset:
6047 * @ctxt: an XML parser context
6048 * @ExternalID: the external identifier
6049 * @SystemID: the system identifier (or URL)
6050 *
6051 * parse Markup declarations from an external subset
6052 *
6053 * [30] extSubset ::= textDecl? extSubsetDecl
6054 *
6055 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6056 */
6057void
6058xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6059 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006060 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006062 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006063 xmlParseTextDecl(ctxt);
6064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6065 /*
6066 * The XML REC instructs us to stop parsing right here
6067 */
6068 ctxt->instate = XML_PARSER_EOF;
6069 return;
6070 }
6071 }
6072 if (ctxt->myDoc == NULL) {
6073 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6074 }
6075 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6076 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6077
6078 ctxt->instate = XML_PARSER_DTD;
6079 ctxt->external = 1;
6080 while (((RAW == '<') && (NXT(1) == '?')) ||
6081 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006082 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006083 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006084 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006085
6086 GROW;
6087 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6088 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006089 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006090 NEXT;
6091 } else if (RAW == '%') {
6092 xmlParsePEReference(ctxt);
6093 } else
6094 xmlParseMarkupDecl(ctxt);
6095
6096 /*
6097 * Pop-up of finished entities.
6098 */
6099 while ((RAW == 0) && (ctxt->inputNr > 1))
6100 xmlPopInput(ctxt);
6101
Daniel Veillardfdc91562002-07-01 21:52:03 +00006102 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006103 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006104 break;
6105 }
6106 }
6107
6108 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006109 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 }
6111
6112}
6113
6114/**
6115 * xmlParseReference:
6116 * @ctxt: an XML parser context
6117 *
6118 * parse and handle entity references in content, depending on the SAX
6119 * interface, this may end-up in a call to character() if this is a
6120 * CharRef, a predefined entity, if there is no reference() callback.
6121 * or if the parser was asked to switch to that mode.
6122 *
6123 * [67] Reference ::= EntityRef | CharRef
6124 */
6125void
6126xmlParseReference(xmlParserCtxtPtr ctxt) {
6127 xmlEntityPtr ent;
6128 xmlChar *val;
6129 if (RAW != '&') return;
6130
6131 if (NXT(1) == '#') {
6132 int i = 0;
6133 xmlChar out[10];
6134 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006135 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006136
Daniel Veillarddc171602008-03-26 17:41:38 +00006137 if (value == 0)
6138 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006139 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6140 /*
6141 * So we are using non-UTF-8 buffers
6142 * Check that the char fit on 8bits, if not
6143 * generate a CharRef.
6144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006145 if (value <= 0xFF) {
6146 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006147 out[1] = 0;
6148 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6149 (!ctxt->disableSAX))
6150 ctxt->sax->characters(ctxt->userData, out, 1);
6151 } else {
6152 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006153 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006154 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006155 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6157 (!ctxt->disableSAX))
6158 ctxt->sax->reference(ctxt->userData, out);
6159 }
6160 } else {
6161 /*
6162 * Just encode the value in UTF-8
6163 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006164 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 out[i] = 0;
6166 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6167 (!ctxt->disableSAX))
6168 ctxt->sax->characters(ctxt->userData, out, i);
6169 }
6170 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006171 int was_checked;
6172
Owen Taylor3473f882001-02-23 17:55:21 +00006173 ent = xmlParseEntityRef(ctxt);
6174 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006175 if (!ctxt->wellFormed)
6176 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006177 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006178 if ((ent->name != NULL) &&
6179 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6180 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006181 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006182
6183
6184 /*
6185 * The first reference to the entity trigger a parsing phase
6186 * where the ent->children is filled with the result from
6187 * the parsing.
6188 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006189 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006190 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006191
Owen Taylor3473f882001-02-23 17:55:21 +00006192 value = ent->content;
6193
6194 /*
6195 * Check that this entity is well formed
6196 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006197 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006198 (value[1] == 0) && (value[0] == '<') &&
6199 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6200 /*
6201 * DONE: get definite answer on this !!!
6202 * Lots of entity decls are used to declare a single
6203 * char
6204 * <!ENTITY lt "<">
6205 * Which seems to be valid since
6206 * 2.4: The ampersand character (&) and the left angle
6207 * bracket (<) may appear in their literal form only
6208 * when used ... They are also legal within the literal
6209 * entity value of an internal entity declaration;i
6210 * see "4.3.2 Well-Formed Parsed Entities".
6211 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6212 * Looking at the OASIS test suite and James Clark
6213 * tests, this is broken. However the XML REC uses
6214 * it. Is the XML REC not well-formed ????
6215 * This is a hack to avoid this problem
6216 *
6217 * ANSWER: since lt gt amp .. are already defined,
6218 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006219 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006220 * is lousy but acceptable.
6221 */
6222 list = xmlNewDocText(ctxt->myDoc, value);
6223 if (list != NULL) {
6224 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6225 (ent->children == NULL)) {
6226 ent->children = list;
6227 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006228 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006229 list->parent = (xmlNodePtr) ent;
6230 } else {
6231 xmlFreeNodeList(list);
6232 }
6233 } else if (list != NULL) {
6234 xmlFreeNodeList(list);
6235 }
6236 } else {
6237 /*
6238 * 4.3.2: An internal general parsed entity is well-formed
6239 * if its replacement text matches the production labeled
6240 * content.
6241 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006242
6243 void *user_data;
6244 /*
6245 * This is a bit hackish but this seems the best
6246 * way to make sure both SAX and DOM entity support
6247 * behaves okay.
6248 */
6249 if (ctxt->userData == ctxt)
6250 user_data = NULL;
6251 else
6252 user_data = ctxt->userData;
6253
Owen Taylor3473f882001-02-23 17:55:21 +00006254 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6255 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006256 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6257 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006258 ctxt->depth--;
6259 } else if (ent->etype ==
6260 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6261 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006262 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006263 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006264 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006265 ctxt->depth--;
6266 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006267 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006268 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6269 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006270 }
6271 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006272 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006273 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006274 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006275 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6276 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006277 (ent->children == NULL)) {
6278 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006279 if (ctxt->replaceEntities) {
6280 /*
6281 * Prune it directly in the generated document
6282 * except for single text nodes.
6283 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006284 if (((list->type == XML_TEXT_NODE) &&
6285 (list->next == NULL)) ||
6286 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006287 list->parent = (xmlNodePtr) ent;
6288 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006289 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006290 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006291 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006292 while (list != NULL) {
6293 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006294 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006295 if (list->next == NULL)
6296 ent->last = list;
6297 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006298 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006299 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006300#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006301 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6302 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006303#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006304 }
6305 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006306 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006307 while (list != NULL) {
6308 list->parent = (xmlNodePtr) ent;
6309 if (list->next == NULL)
6310 ent->last = list;
6311 list = list->next;
6312 }
Owen Taylor3473f882001-02-23 17:55:21 +00006313 }
6314 } else {
6315 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006316 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006317 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006318 } else if ((ret != XML_ERR_OK) &&
6319 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006320 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6321 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006322 } else if (list != NULL) {
6323 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006324 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006325 }
6326 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006327 ent->checked = 1;
6328 }
6329
6330 if (ent->children == NULL) {
6331 /*
6332 * Probably running in SAX mode and the callbacks don't
6333 * build the entity content. So unless we already went
6334 * though parsing for first checking go though the entity
6335 * content to generate callbacks associated to the entity
6336 */
6337 if (was_checked == 1) {
6338 void *user_data;
6339 /*
6340 * This is a bit hackish but this seems the best
6341 * way to make sure both SAX and DOM entity support
6342 * behaves okay.
6343 */
6344 if (ctxt->userData == ctxt)
6345 user_data = NULL;
6346 else
6347 user_data = ctxt->userData;
6348
6349 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6350 ctxt->depth++;
6351 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6352 ent->content, user_data, NULL);
6353 ctxt->depth--;
6354 } else if (ent->etype ==
6355 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6356 ctxt->depth++;
6357 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6358 ctxt->sax, user_data, ctxt->depth,
6359 ent->URI, ent->ExternalID, NULL);
6360 ctxt->depth--;
6361 } else {
6362 ret = XML_ERR_ENTITY_PE_INTERNAL;
6363 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6364 "invalid entity type found\n", NULL);
6365 }
6366 if (ret == XML_ERR_ENTITY_LOOP) {
6367 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6368 return;
6369 }
6370 }
6371 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6372 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6373 /*
6374 * Entity reference callback comes second, it's somewhat
6375 * superfluous but a compatibility to historical behaviour
6376 */
6377 ctxt->sax->reference(ctxt->userData, ent->name);
6378 }
6379 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006380 }
6381 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006382 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006383 /*
6384 * Create a node.
6385 */
6386 ctxt->sax->reference(ctxt->userData, ent->name);
6387 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006388 }
6389 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006390 /*
6391 * There is a problem on the handling of _private for entities
6392 * (bug 155816): Should we copy the content of the field from
6393 * the entity (possibly overwriting some value set by the user
6394 * when a copy is created), should we leave it alone, or should
6395 * we try to take care of different situations? The problem
6396 * is exacerbated by the usage of this field by the xmlReader.
6397 * To fix this bug, we look at _private on the created node
6398 * and, if it's NULL, we copy in whatever was in the entity.
6399 * If it's not NULL we leave it alone. This is somewhat of a
6400 * hack - maybe we should have further tests to determine
6401 * what to do.
6402 */
Owen Taylor3473f882001-02-23 17:55:21 +00006403 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6404 /*
6405 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006406 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006407 * In the first occurrence list contains the replacement.
6408 * progressive == 2 means we are operating on the Reader
6409 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006410 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006411 if (((list == NULL) && (ent->owner == 0)) ||
6412 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006413 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006414
6415 /*
6416 * when operating on a reader, the entities definitions
6417 * are always owning the entities subtree.
6418 if (ctxt->parseMode == XML_PARSE_READER)
6419 ent->owner = 1;
6420 */
6421
Daniel Veillard62f313b2001-07-04 19:49:14 +00006422 cur = ent->children;
6423 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006424 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006425 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006426 if (nw->_private == NULL)
6427 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006428 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006429 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006430 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006431 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006432 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006433 if (cur == ent->last) {
6434 /*
6435 * needed to detect some strange empty
6436 * node cases in the reader tests
6437 */
6438 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006439 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006440 (nw->type == XML_ELEMENT_NODE) &&
6441 (nw->children == NULL))
6442 nw->extra = 1;
6443
Daniel Veillard62f313b2001-07-04 19:49:14 +00006444 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006445 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006446 cur = cur->next;
6447 }
Daniel Veillard81273902003-09-30 00:43:48 +00006448#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006449 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006450 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006451#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006452 } else if (list == NULL) {
6453 xmlNodePtr nw = NULL, cur, next, last,
6454 firstChild = NULL;
6455 /*
6456 * Copy the entity child list and make it the new
6457 * entity child list. The goal is to make sure any
6458 * ID or REF referenced will be the one from the
6459 * document content and not the entity copy.
6460 */
6461 cur = ent->children;
6462 ent->children = NULL;
6463 last = ent->last;
6464 ent->last = NULL;
6465 while (cur != NULL) {
6466 next = cur->next;
6467 cur->next = NULL;
6468 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006469 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006470 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006471 if (nw->_private == NULL)
6472 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006473 if (firstChild == NULL){
6474 firstChild = cur;
6475 }
6476 xmlAddChild((xmlNodePtr) ent, nw);
6477 xmlAddChild(ctxt->node, cur);
6478 }
6479 if (cur == last)
6480 break;
6481 cur = next;
6482 }
6483 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006484#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006485 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6486 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006487#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006488 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006489 const xmlChar *nbktext;
6490
Daniel Veillard62f313b2001-07-04 19:49:14 +00006491 /*
6492 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006493 * node with a possible previous text one which
6494 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006495 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006496 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6497 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006498 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006499 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006500 if ((ent->last != ent->children) &&
6501 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006502 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006503 xmlAddChildList(ctxt->node, ent->children);
6504 }
6505
Owen Taylor3473f882001-02-23 17:55:21 +00006506 /*
6507 * This is to avoid a nasty side effect, see
6508 * characters() in SAX.c
6509 */
6510 ctxt->nodemem = 0;
6511 ctxt->nodelen = 0;
6512 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006513 }
6514 }
6515 } else {
6516 val = ent->content;
6517 if (val == NULL) return;
6518 /*
6519 * inline the entity.
6520 */
6521 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6522 (!ctxt->disableSAX))
6523 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6524 }
6525 }
6526}
6527
6528/**
6529 * xmlParseEntityRef:
6530 * @ctxt: an XML parser context
6531 *
6532 * parse ENTITY references declarations
6533 *
6534 * [68] EntityRef ::= '&' Name ';'
6535 *
6536 * [ WFC: Entity Declared ]
6537 * In a document without any DTD, a document with only an internal DTD
6538 * subset which contains no parameter entity references, or a document
6539 * with "standalone='yes'", the Name given in the entity reference
6540 * must match that in an entity declaration, except that well-formed
6541 * documents need not declare any of the following entities: amp, lt,
6542 * gt, apos, quot. The declaration of a parameter entity must precede
6543 * any reference to it. Similarly, the declaration of a general entity
6544 * must precede any reference to it which appears in a default value in an
6545 * attribute-list declaration. Note that if entities are declared in the
6546 * external subset or in external parameter entities, a non-validating
6547 * processor is not obligated to read and process their declarations;
6548 * for such documents, the rule that an entity must be declared is a
6549 * well-formedness constraint only if standalone='yes'.
6550 *
6551 * [ WFC: Parsed Entity ]
6552 * An entity reference must not contain the name of an unparsed entity
6553 *
6554 * Returns the xmlEntityPtr if found, or NULL otherwise.
6555 */
6556xmlEntityPtr
6557xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006558 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006559 xmlEntityPtr ent = NULL;
6560
6561 GROW;
6562
6563 if (RAW == '&') {
6564 NEXT;
6565 name = xmlParseName(ctxt);
6566 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006567 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6568 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006569 } else {
6570 if (RAW == ';') {
6571 NEXT;
6572 /*
6573 * Ask first SAX for entity resolution, otherwise try the
6574 * predefined set.
6575 */
6576 if (ctxt->sax != NULL) {
6577 if (ctxt->sax->getEntity != NULL)
6578 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006579 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006580 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006581 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6582 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006583 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006584 }
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
6586 /*
6587 * [ WFC: Entity Declared ]
6588 * In a document without any DTD, a document with only an
6589 * internal DTD subset which contains no parameter entity
6590 * references, or a document with "standalone='yes'", the
6591 * Name given in the entity reference must match that in an
6592 * entity declaration, except that well-formed documents
6593 * need not declare any of the following entities: amp, lt,
6594 * gt, apos, quot.
6595 * The declaration of a parameter entity must precede any
6596 * reference to it.
6597 * Similarly, the declaration of a general entity must
6598 * precede any reference to it which appears in a default
6599 * value in an attribute-list declaration. Note that if
6600 * entities are declared in the external subset or in
6601 * external parameter entities, a non-validating processor
6602 * is not obligated to read and process their declarations;
6603 * for such documents, the rule that an entity must be
6604 * declared is a well-formedness constraint only if
6605 * standalone='yes'.
6606 */
6607 if (ent == NULL) {
6608 if ((ctxt->standalone == 1) ||
6609 ((ctxt->hasExternalSubset == 0) &&
6610 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006611 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006612 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006613 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006614 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006615 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006616 if ((ctxt->inSubset == 0) &&
6617 (ctxt->sax != NULL) &&
6618 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006619 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006620 }
Owen Taylor3473f882001-02-23 17:55:21 +00006621 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006622 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006623 }
6624
6625 /*
6626 * [ WFC: Parsed Entity ]
6627 * An entity reference must not contain the name of an
6628 * unparsed entity
6629 */
6630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006631 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006632 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006633 }
6634
6635 /*
6636 * [ WFC: No External Entity References ]
6637 * Attribute values cannot contain direct or indirect
6638 * entity references to external entities.
6639 */
6640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006642 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6643 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006644 }
6645 /*
6646 * [ WFC: No < in Attribute Values ]
6647 * The replacement text of any entity referred to directly or
6648 * indirectly in an attribute value (other than "&lt;") must
6649 * not contain a <.
6650 */
6651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6652 (ent != NULL) &&
6653 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6654 (ent->content != NULL) &&
6655 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006656 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006657 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659
6660 /*
6661 * Internal check, no parameter entities here ...
6662 */
6663 else {
6664 switch (ent->etype) {
6665 case XML_INTERNAL_PARAMETER_ENTITY:
6666 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006667 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6668 "Attempt to reference the parameter entity '%s'\n",
6669 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006670 break;
6671 default:
6672 break;
6673 }
6674 }
6675
6676 /*
6677 * [ WFC: No Recursion ]
6678 * A parsed entity must not contain a recursive reference
6679 * to itself, either directly or indirectly.
6680 * Done somewhere else
6681 */
6682
6683 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006684 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
Owen Taylor3473f882001-02-23 17:55:21 +00006686 }
6687 }
6688 return(ent);
6689}
6690
6691/**
6692 * xmlParseStringEntityRef:
6693 * @ctxt: an XML parser context
6694 * @str: a pointer to an index in the string
6695 *
6696 * parse ENTITY references declarations, but this version parses it from
6697 * a string value.
6698 *
6699 * [68] EntityRef ::= '&' Name ';'
6700 *
6701 * [ WFC: Entity Declared ]
6702 * In a document without any DTD, a document with only an internal DTD
6703 * subset which contains no parameter entity references, or a document
6704 * with "standalone='yes'", the Name given in the entity reference
6705 * must match that in an entity declaration, except that well-formed
6706 * documents need not declare any of the following entities: amp, lt,
6707 * gt, apos, quot. The declaration of a parameter entity must precede
6708 * any reference to it. Similarly, the declaration of a general entity
6709 * must precede any reference to it which appears in a default value in an
6710 * attribute-list declaration. Note that if entities are declared in the
6711 * external subset or in external parameter entities, a non-validating
6712 * processor is not obligated to read and process their declarations;
6713 * for such documents, the rule that an entity must be declared is a
6714 * well-formedness constraint only if standalone='yes'.
6715 *
6716 * [ WFC: Parsed Entity ]
6717 * An entity reference must not contain the name of an unparsed entity
6718 *
6719 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6720 * is updated to the current location in the string.
6721 */
6722xmlEntityPtr
6723xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6724 xmlChar *name;
6725 const xmlChar *ptr;
6726 xmlChar cur;
6727 xmlEntityPtr ent = NULL;
6728
6729 if ((str == NULL) || (*str == NULL))
6730 return(NULL);
6731 ptr = *str;
6732 cur = *ptr;
6733 if (cur == '&') {
6734 ptr++;
6735 cur = *ptr;
6736 name = xmlParseStringName(ctxt, &ptr);
6737 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006738 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6739 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006740 } else {
6741 if (*ptr == ';') {
6742 ptr++;
6743 /*
6744 * Ask first SAX for entity resolution, otherwise try the
6745 * predefined set.
6746 */
6747 if (ctxt->sax != NULL) {
6748 if (ctxt->sax->getEntity != NULL)
6749 ent = ctxt->sax->getEntity(ctxt->userData, name);
6750 if (ent == NULL)
6751 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006752 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006753 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006754 }
Owen Taylor3473f882001-02-23 17:55:21 +00006755 }
6756 /*
6757 * [ WFC: Entity Declared ]
6758 * In a document without any DTD, a document with only an
6759 * internal DTD subset which contains no parameter entity
6760 * references, or a document with "standalone='yes'", the
6761 * Name given in the entity reference must match that in an
6762 * entity declaration, except that well-formed documents
6763 * need not declare any of the following entities: amp, lt,
6764 * gt, apos, quot.
6765 * The declaration of a parameter entity must precede any
6766 * reference to it.
6767 * Similarly, the declaration of a general entity must
6768 * precede any reference to it which appears in a default
6769 * value in an attribute-list declaration. Note that if
6770 * entities are declared in the external subset or in
6771 * external parameter entities, a non-validating processor
6772 * is not obligated to read and process their declarations;
6773 * for such documents, the rule that an entity must be
6774 * declared is a well-formedness constraint only if
6775 * standalone='yes'.
6776 */
6777 if (ent == NULL) {
6778 if ((ctxt->standalone == 1) ||
6779 ((ctxt->hasExternalSubset == 0) &&
6780 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006781 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006782 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006783 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006784 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006785 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006786 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006787 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006788 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006789 }
6790
6791 /*
6792 * [ WFC: Parsed Entity ]
6793 * An entity reference must not contain the name of an
6794 * unparsed entity
6795 */
6796 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006797 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006798 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006799 }
6800
6801 /*
6802 * [ WFC: No External Entity References ]
6803 * Attribute values cannot contain direct or indirect
6804 * entity references to external entities.
6805 */
6806 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6807 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006808 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006809 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006810 }
6811 /*
6812 * [ WFC: No < in Attribute Values ]
6813 * The replacement text of any entity referred to directly or
6814 * indirectly in an attribute value (other than "&lt;") must
6815 * not contain a <.
6816 */
6817 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6818 (ent != NULL) &&
6819 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6820 (ent->content != NULL) &&
6821 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006822 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6823 "'<' in entity '%s' is not allowed in attributes values\n",
6824 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006825 }
6826
6827 /*
6828 * Internal check, no parameter entities here ...
6829 */
6830 else {
6831 switch (ent->etype) {
6832 case XML_INTERNAL_PARAMETER_ENTITY:
6833 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006834 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6835 "Attempt to reference the parameter entity '%s'\n",
6836 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006837 break;
6838 default:
6839 break;
6840 }
6841 }
6842
6843 /*
6844 * [ WFC: No Recursion ]
6845 * A parsed entity must not contain a recursive reference
6846 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006847 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006848 */
6849
6850 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006851 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006852 }
6853 xmlFree(name);
6854 }
6855 }
6856 *str = ptr;
6857 return(ent);
6858}
6859
6860/**
6861 * xmlParsePEReference:
6862 * @ctxt: an XML parser context
6863 *
6864 * parse PEReference declarations
6865 * The entity content is handled directly by pushing it's content as
6866 * a new input stream.
6867 *
6868 * [69] PEReference ::= '%' Name ';'
6869 *
6870 * [ WFC: No Recursion ]
6871 * A parsed entity must not contain a recursive
6872 * reference to itself, either directly or indirectly.
6873 *
6874 * [ WFC: Entity Declared ]
6875 * In a document without any DTD, a document with only an internal DTD
6876 * subset which contains no parameter entity references, or a document
6877 * with "standalone='yes'", ... ... The declaration of a parameter
6878 * entity must precede any reference to it...
6879 *
6880 * [ VC: Entity Declared ]
6881 * In a document with an external subset or external parameter entities
6882 * with "standalone='no'", ... ... The declaration of a parameter entity
6883 * must precede any reference to it...
6884 *
6885 * [ WFC: In DTD ]
6886 * Parameter-entity references may only appear in the DTD.
6887 * NOTE: misleading but this is handled.
6888 */
6889void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006890xmlParsePEReference(xmlParserCtxtPtr ctxt)
6891{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006892 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006893 xmlEntityPtr entity = NULL;
6894 xmlParserInputPtr input;
6895
6896 if (RAW == '%') {
6897 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006898 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006899 if (name == NULL) {
6900 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6901 "xmlParsePEReference: no name\n");
6902 } else {
6903 if (RAW == ';') {
6904 NEXT;
6905 if ((ctxt->sax != NULL) &&
6906 (ctxt->sax->getParameterEntity != NULL))
6907 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6908 name);
6909 if (entity == NULL) {
6910 /*
6911 * [ WFC: Entity Declared ]
6912 * In a document without any DTD, a document with only an
6913 * internal DTD subset which contains no parameter entity
6914 * references, or a document with "standalone='yes'", ...
6915 * ... The declaration of a parameter entity must precede
6916 * any reference to it...
6917 */
6918 if ((ctxt->standalone == 1) ||
6919 ((ctxt->hasExternalSubset == 0) &&
6920 (ctxt->hasPErefs == 0))) {
6921 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6922 "PEReference: %%%s; not found\n",
6923 name);
6924 } else {
6925 /*
6926 * [ VC: Entity Declared ]
6927 * In a document with an external subset or external
6928 * parameter entities with "standalone='no'", ...
6929 * ... The declaration of a parameter entity must
6930 * precede any reference to it...
6931 */
6932 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6933 "PEReference: %%%s; not found\n",
6934 name, NULL);
6935 ctxt->valid = 0;
6936 }
6937 } else {
6938 /*
6939 * Internal checking in case the entity quest barfed
6940 */
6941 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6942 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6943 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6944 "Internal: %%%s; is not a parameter entity\n",
6945 name, NULL);
6946 } else if (ctxt->input->free != deallocblankswrapper) {
6947 input =
6948 xmlNewBlanksWrapperInputStream(ctxt, entity);
6949 xmlPushInput(ctxt, input);
6950 } else {
6951 /*
6952 * TODO !!!
6953 * handle the extra spaces added before and after
6954 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6955 */
6956 input = xmlNewEntityInputStream(ctxt, entity);
6957 xmlPushInput(ctxt, input);
6958 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006959 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006960 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006961 xmlParseTextDecl(ctxt);
6962 if (ctxt->errNo ==
6963 XML_ERR_UNSUPPORTED_ENCODING) {
6964 /*
6965 * The XML REC instructs us to stop parsing
6966 * right here
6967 */
6968 ctxt->instate = XML_PARSER_EOF;
6969 return;
6970 }
6971 }
6972 }
6973 }
6974 ctxt->hasPErefs = 1;
6975 } else {
6976 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6977 }
6978 }
Owen Taylor3473f882001-02-23 17:55:21 +00006979 }
6980}
6981
6982/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006983 * xmlLoadEntityContent:
6984 * @ctxt: an XML parser context
6985 * @entity: an unloaded system entity
6986 *
6987 * Load the original content of the given system entity from the
6988 * ExternalID/SystemID given. This is to be used for Included in Literal
6989 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
6990 *
6991 * Returns 0 in case of success and -1 in case of failure
6992 */
6993static int
6994xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
6995 xmlParserInputPtr input;
6996 xmlBufferPtr buf;
6997 int l, c;
6998 int count = 0;
6999
7000 if ((ctxt == NULL) || (entity == NULL) ||
7001 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7002 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7003 (entity->content != NULL)) {
7004 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7005 "xmlLoadEntityContent parameter error");
7006 return(-1);
7007 }
7008
7009 if (xmlParserDebugEntities)
7010 xmlGenericError(xmlGenericErrorContext,
7011 "Reading %s entity content input\n", entity->name);
7012
7013 buf = xmlBufferCreate();
7014 if (buf == NULL) {
7015 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7016 "xmlLoadEntityContent parameter error");
7017 return(-1);
7018 }
7019
7020 input = xmlNewEntityInputStream(ctxt, entity);
7021 if (input == NULL) {
7022 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7023 "xmlLoadEntityContent input error");
7024 xmlBufferFree(buf);
7025 return(-1);
7026 }
7027
7028 /*
7029 * Push the entity as the current input, read char by char
7030 * saving to the buffer until the end of the entity or an error
7031 */
7032 xmlPushInput(ctxt, input);
7033 GROW;
7034 c = CUR_CHAR(l);
7035 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7036 (IS_CHAR(c))) {
7037 xmlBufferAdd(buf, ctxt->input->cur, l);
7038 if (count++ > 100) {
7039 count = 0;
7040 GROW;
7041 }
7042 NEXTL(l);
7043 c = CUR_CHAR(l);
7044 }
7045
7046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7047 xmlPopInput(ctxt);
7048 } else if (!IS_CHAR(c)) {
7049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7050 "xmlLoadEntityContent: invalid char value %d\n",
7051 c);
7052 xmlBufferFree(buf);
7053 return(-1);
7054 }
7055 entity->content = buf->content;
7056 buf->content = NULL;
7057 xmlBufferFree(buf);
7058
7059 return(0);
7060}
7061
7062/**
Owen Taylor3473f882001-02-23 17:55:21 +00007063 * xmlParseStringPEReference:
7064 * @ctxt: an XML parser context
7065 * @str: a pointer to an index in the string
7066 *
7067 * parse PEReference declarations
7068 *
7069 * [69] PEReference ::= '%' Name ';'
7070 *
7071 * [ WFC: No Recursion ]
7072 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007073 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007074 *
7075 * [ WFC: Entity Declared ]
7076 * In a document without any DTD, a document with only an internal DTD
7077 * subset which contains no parameter entity references, or a document
7078 * with "standalone='yes'", ... ... The declaration of a parameter
7079 * entity must precede any reference to it...
7080 *
7081 * [ VC: Entity Declared ]
7082 * In a document with an external subset or external parameter entities
7083 * with "standalone='no'", ... ... The declaration of a parameter entity
7084 * must precede any reference to it...
7085 *
7086 * [ WFC: In DTD ]
7087 * Parameter-entity references may only appear in the DTD.
7088 * NOTE: misleading but this is handled.
7089 *
7090 * Returns the string of the entity content.
7091 * str is updated to the current value of the index
7092 */
7093xmlEntityPtr
7094xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7095 const xmlChar *ptr;
7096 xmlChar cur;
7097 xmlChar *name;
7098 xmlEntityPtr entity = NULL;
7099
7100 if ((str == NULL) || (*str == NULL)) return(NULL);
7101 ptr = *str;
7102 cur = *ptr;
7103 if (cur == '%') {
7104 ptr++;
7105 cur = *ptr;
7106 name = xmlParseStringName(ctxt, &ptr);
7107 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7109 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007110 } else {
7111 cur = *ptr;
7112 if (cur == ';') {
7113 ptr++;
7114 cur = *ptr;
7115 if ((ctxt->sax != NULL) &&
7116 (ctxt->sax->getParameterEntity != NULL))
7117 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7118 name);
7119 if (entity == NULL) {
7120 /*
7121 * [ WFC: Entity Declared ]
7122 * In a document without any DTD, a document with only an
7123 * internal DTD subset which contains no parameter entity
7124 * references, or a document with "standalone='yes'", ...
7125 * ... The declaration of a parameter entity must precede
7126 * any reference to it...
7127 */
7128 if ((ctxt->standalone == 1) ||
7129 ((ctxt->hasExternalSubset == 0) &&
7130 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007131 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007132 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007133 } else {
7134 /*
7135 * [ VC: Entity Declared ]
7136 * In a document with an external subset or external
7137 * parameter entities with "standalone='no'", ...
7138 * ... The declaration of a parameter entity must
7139 * precede any reference to it...
7140 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007141 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7142 "PEReference: %%%s; not found\n",
7143 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007144 ctxt->valid = 0;
7145 }
7146 } else {
7147 /*
7148 * Internal checking in case the entity quest barfed
7149 */
7150 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7151 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007152 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7153 "%%%s; is not a parameter entity\n",
7154 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007155 }
7156 }
7157 ctxt->hasPErefs = 1;
7158 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007159 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007160 }
7161 xmlFree(name);
7162 }
7163 }
7164 *str = ptr;
7165 return(entity);
7166}
7167
7168/**
7169 * xmlParseDocTypeDecl:
7170 * @ctxt: an XML parser context
7171 *
7172 * parse a DOCTYPE declaration
7173 *
7174 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7175 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7176 *
7177 * [ VC: Root Element Type ]
7178 * The Name in the document type declaration must match the element
7179 * type of the root element.
7180 */
7181
7182void
7183xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007184 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007185 xmlChar *ExternalID = NULL;
7186 xmlChar *URI = NULL;
7187
7188 /*
7189 * We know that '<!DOCTYPE' has been detected.
7190 */
7191 SKIP(9);
7192
7193 SKIP_BLANKS;
7194
7195 /*
7196 * Parse the DOCTYPE name.
7197 */
7198 name = xmlParseName(ctxt);
7199 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007200 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7201 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007202 }
7203 ctxt->intSubName = name;
7204
7205 SKIP_BLANKS;
7206
7207 /*
7208 * Check for SystemID and ExternalID
7209 */
7210 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7211
7212 if ((URI != NULL) || (ExternalID != NULL)) {
7213 ctxt->hasExternalSubset = 1;
7214 }
7215 ctxt->extSubURI = URI;
7216 ctxt->extSubSystem = ExternalID;
7217
7218 SKIP_BLANKS;
7219
7220 /*
7221 * Create and update the internal subset.
7222 */
7223 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7226
7227 /*
7228 * Is there any internal subset declarations ?
7229 * they are handled separately in xmlParseInternalSubset()
7230 */
7231 if (RAW == '[')
7232 return;
7233
7234 /*
7235 * We should be at the end of the DOCTYPE declaration.
7236 */
7237 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007238 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007239 }
7240 NEXT;
7241}
7242
7243/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007244 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007245 * @ctxt: an XML parser context
7246 *
7247 * parse the internal subset declaration
7248 *
7249 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7250 */
7251
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007252static void
Owen Taylor3473f882001-02-23 17:55:21 +00007253xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7254 /*
7255 * Is there any DTD definition ?
7256 */
7257 if (RAW == '[') {
7258 ctxt->instate = XML_PARSER_DTD;
7259 NEXT;
7260 /*
7261 * Parse the succession of Markup declarations and
7262 * PEReferences.
7263 * Subsequence (markupdecl | PEReference | S)*
7264 */
7265 while (RAW != ']') {
7266 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007267 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007268
7269 SKIP_BLANKS;
7270 xmlParseMarkupDecl(ctxt);
7271 xmlParsePEReference(ctxt);
7272
7273 /*
7274 * Pop-up of finished entities.
7275 */
7276 while ((RAW == 0) && (ctxt->inputNr > 1))
7277 xmlPopInput(ctxt);
7278
7279 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007280 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007281 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007282 break;
7283 }
7284 }
7285 if (RAW == ']') {
7286 NEXT;
7287 SKIP_BLANKS;
7288 }
7289 }
7290
7291 /*
7292 * We should be at the end of the DOCTYPE declaration.
7293 */
7294 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007295 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007296 }
7297 NEXT;
7298}
7299
Daniel Veillard81273902003-09-30 00:43:48 +00007300#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007301/**
7302 * xmlParseAttribute:
7303 * @ctxt: an XML parser context
7304 * @value: a xmlChar ** used to store the value of the attribute
7305 *
7306 * parse an attribute
7307 *
7308 * [41] Attribute ::= Name Eq AttValue
7309 *
7310 * [ WFC: No External Entity References ]
7311 * Attribute values cannot contain direct or indirect entity references
7312 * to external entities.
7313 *
7314 * [ WFC: No < in Attribute Values ]
7315 * The replacement text of any entity referred to directly or indirectly in
7316 * an attribute value (other than "&lt;") must not contain a <.
7317 *
7318 * [ VC: Attribute Value Type ]
7319 * The attribute must have been declared; the value must be of the type
7320 * declared for it.
7321 *
7322 * [25] Eq ::= S? '=' S?
7323 *
7324 * With namespace:
7325 *
7326 * [NS 11] Attribute ::= QName Eq AttValue
7327 *
7328 * Also the case QName == xmlns:??? is handled independently as a namespace
7329 * definition.
7330 *
7331 * Returns the attribute name, and the value in *value.
7332 */
7333
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007334const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007335xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007336 const xmlChar *name;
7337 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007338
7339 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007340 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007341 name = xmlParseName(ctxt);
7342 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007343 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007344 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007345 return(NULL);
7346 }
7347
7348 /*
7349 * read the value
7350 */
7351 SKIP_BLANKS;
7352 if (RAW == '=') {
7353 NEXT;
7354 SKIP_BLANKS;
7355 val = xmlParseAttValue(ctxt);
7356 ctxt->instate = XML_PARSER_CONTENT;
7357 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007358 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007359 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007360 return(NULL);
7361 }
7362
7363 /*
7364 * Check that xml:lang conforms to the specification
7365 * No more registered as an error, just generate a warning now
7366 * since this was deprecated in XML second edition
7367 */
7368 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7369 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007370 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7371 "Malformed value for xml:lang : %s\n",
7372 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007373 }
7374 }
7375
7376 /*
7377 * Check that xml:space conforms to the specification
7378 */
7379 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7380 if (xmlStrEqual(val, BAD_CAST "default"))
7381 *(ctxt->space) = 0;
7382 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7383 *(ctxt->space) = 1;
7384 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007385 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007386"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007387 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007388 }
7389 }
7390
7391 *value = val;
7392 return(name);
7393}
7394
7395/**
7396 * xmlParseStartTag:
7397 * @ctxt: an XML parser context
7398 *
7399 * parse a start of tag either for rule element or
7400 * EmptyElement. In both case we don't parse the tag closing chars.
7401 *
7402 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7403 *
7404 * [ WFC: Unique Att Spec ]
7405 * No attribute name may appear more than once in the same start-tag or
7406 * empty-element tag.
7407 *
7408 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7409 *
7410 * [ WFC: Unique Att Spec ]
7411 * No attribute name may appear more than once in the same start-tag or
7412 * empty-element tag.
7413 *
7414 * With namespace:
7415 *
7416 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7417 *
7418 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7419 *
7420 * Returns the element name parsed
7421 */
7422
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007423const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007424xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007425 const xmlChar *name;
7426 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007427 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007428 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007429 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007430 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007431 int i;
7432
7433 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007434 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007435
7436 name = xmlParseName(ctxt);
7437 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007439 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007440 return(NULL);
7441 }
7442
7443 /*
7444 * Now parse the attributes, it ends up with the ending
7445 *
7446 * (S Attribute)* S?
7447 */
7448 SKIP_BLANKS;
7449 GROW;
7450
Daniel Veillard21a0f912001-02-25 19:54:14 +00007451 while ((RAW != '>') &&
7452 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007453 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007454 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007455 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007456
7457 attname = xmlParseAttribute(ctxt, &attvalue);
7458 if ((attname != NULL) && (attvalue != NULL)) {
7459 /*
7460 * [ WFC: Unique Att Spec ]
7461 * No attribute name may appear more than once in the same
7462 * start-tag or empty-element tag.
7463 */
7464 for (i = 0; i < nbatts;i += 2) {
7465 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007466 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007467 xmlFree(attvalue);
7468 goto failed;
7469 }
7470 }
Owen Taylor3473f882001-02-23 17:55:21 +00007471 /*
7472 * Add the pair to atts
7473 */
7474 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007475 maxatts = 22; /* allow for 10 attrs by default */
7476 atts = (const xmlChar **)
7477 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007478 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007479 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007480 if (attvalue != NULL)
7481 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007482 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007483 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007484 ctxt->atts = atts;
7485 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007486 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007487 const xmlChar **n;
7488
Owen Taylor3473f882001-02-23 17:55:21 +00007489 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007490 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007491 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007492 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007493 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007494 if (attvalue != NULL)
7495 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007496 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007497 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007498 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007499 ctxt->atts = atts;
7500 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
7502 atts[nbatts++] = attname;
7503 atts[nbatts++] = attvalue;
7504 atts[nbatts] = NULL;
7505 atts[nbatts + 1] = NULL;
7506 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007507 if (attvalue != NULL)
7508 xmlFree(attvalue);
7509 }
7510
7511failed:
7512
Daniel Veillard3772de32002-12-17 10:31:45 +00007513 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007514 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7515 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007516 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7518 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007519 }
7520 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007521 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7522 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007523 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7524 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007525 break;
7526 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007527 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007528 GROW;
7529 }
7530
7531 /*
7532 * SAX: Start of Element !
7533 */
7534 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007535 (!ctxt->disableSAX)) {
7536 if (nbatts > 0)
7537 ctxt->sax->startElement(ctxt->userData, name, atts);
7538 else
7539 ctxt->sax->startElement(ctxt->userData, name, NULL);
7540 }
Owen Taylor3473f882001-02-23 17:55:21 +00007541
7542 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007543 /* Free only the content strings */
7544 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007545 if (atts[i] != NULL)
7546 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007547 }
7548 return(name);
7549}
7550
7551/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007552 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007553 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007554 * @line: line of the start tag
7555 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007556 *
7557 * parse an end of tag
7558 *
7559 * [42] ETag ::= '</' Name S? '>'
7560 *
7561 * With namespace
7562 *
7563 * [NS 9] ETag ::= '</' QName S? '>'
7564 */
7565
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007566static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007567xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007568 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007569
7570 GROW;
7571 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007572 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007573 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007574 return;
7575 }
7576 SKIP(2);
7577
Daniel Veillard46de64e2002-05-29 08:21:33 +00007578 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007579
7580 /*
7581 * We should definitely be at the ending "S? '>'" part
7582 */
7583 GROW;
7584 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007585 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007586 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007587 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007588 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007589
7590 /*
7591 * [ WFC: Element Type Match ]
7592 * The Name in an element's end-tag must match the element type in the
7593 * start-tag.
7594 *
7595 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007596 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007597 if (name == NULL) name = BAD_CAST "unparseable";
7598 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007599 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007600 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007601 }
7602
7603 /*
7604 * SAX: End of Tag
7605 */
7606 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7607 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007608 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007609
Daniel Veillarde57ec792003-09-10 10:50:59 +00007610 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007611 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007612 return;
7613}
7614
7615/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007616 * xmlParseEndTag:
7617 * @ctxt: an XML parser context
7618 *
7619 * parse an end of tag
7620 *
7621 * [42] ETag ::= '</' Name S? '>'
7622 *
7623 * With namespace
7624 *
7625 * [NS 9] ETag ::= '</' QName S? '>'
7626 */
7627
7628void
7629xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007630 xmlParseEndTag1(ctxt, 0);
7631}
Daniel Veillard81273902003-09-30 00:43:48 +00007632#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633
7634/************************************************************************
7635 * *
7636 * SAX 2 specific operations *
7637 * *
7638 ************************************************************************/
7639
7640static const xmlChar *
7641xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7642 int len = 0, l;
7643 int c;
7644 int count = 0;
7645
7646 /*
7647 * Handler for more complex cases
7648 */
7649 GROW;
7650 c = CUR_CHAR(l);
7651 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007652 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 return(NULL);
7654 }
7655
7656 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007657 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007658 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007659 (IS_COMBINING(c)) ||
7660 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 if (count++ > 100) {
7662 count = 0;
7663 GROW;
7664 }
7665 len += l;
7666 NEXTL(l);
7667 c = CUR_CHAR(l);
7668 }
7669 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7670}
7671
7672/*
7673 * xmlGetNamespace:
7674 * @ctxt: an XML parser context
7675 * @prefix: the prefix to lookup
7676 *
7677 * Lookup the namespace name for the @prefix (which ca be NULL)
7678 * The prefix must come from the @ctxt->dict dictionnary
7679 *
7680 * Returns the namespace name or NULL if not bound
7681 */
7682static const xmlChar *
7683xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7684 int i;
7685
Daniel Veillarde57ec792003-09-10 10:50:59 +00007686 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007687 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007688 if (ctxt->nsTab[i] == prefix) {
7689 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7690 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007691 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007692 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 return(NULL);
7694}
7695
7696/**
7697 * xmlParseNCName:
7698 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007699 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007700 *
7701 * parse an XML name.
7702 *
7703 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7704 * CombiningChar | Extender
7705 *
7706 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7707 *
7708 * Returns the Name parsed or NULL
7709 */
7710
7711static const xmlChar *
7712xmlParseNCName(xmlParserCtxtPtr ctxt) {
7713 const xmlChar *in;
7714 const xmlChar *ret;
7715 int count = 0;
7716
7717 /*
7718 * Accelerator for simple ASCII names
7719 */
7720 in = ctxt->input->cur;
7721 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7722 ((*in >= 0x41) && (*in <= 0x5A)) ||
7723 (*in == '_')) {
7724 in++;
7725 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7726 ((*in >= 0x41) && (*in <= 0x5A)) ||
7727 ((*in >= 0x30) && (*in <= 0x39)) ||
7728 (*in == '_') || (*in == '-') ||
7729 (*in == '.'))
7730 in++;
7731 if ((*in > 0) && (*in < 0x80)) {
7732 count = in - ctxt->input->cur;
7733 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7734 ctxt->input->cur = in;
7735 ctxt->nbChars += count;
7736 ctxt->input->col += count;
7737 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007738 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 }
7740 return(ret);
7741 }
7742 }
7743 return(xmlParseNCNameComplex(ctxt));
7744}
7745
7746/**
7747 * xmlParseQName:
7748 * @ctxt: an XML parser context
7749 * @prefix: pointer to store the prefix part
7750 *
7751 * parse an XML Namespace QName
7752 *
7753 * [6] QName ::= (Prefix ':')? LocalPart
7754 * [7] Prefix ::= NCName
7755 * [8] LocalPart ::= NCName
7756 *
7757 * Returns the Name parsed or NULL
7758 */
7759
7760static const xmlChar *
7761xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7762 const xmlChar *l, *p;
7763
7764 GROW;
7765
7766 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007767 if (l == NULL) {
7768 if (CUR == ':') {
7769 l = xmlParseName(ctxt);
7770 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007771 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7772 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007773 *prefix = NULL;
7774 return(l);
7775 }
7776 }
7777 return(NULL);
7778 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007779 if (CUR == ':') {
7780 NEXT;
7781 p = l;
7782 l = xmlParseNCName(ctxt);
7783 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007784 xmlChar *tmp;
7785
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007786 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7787 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007788 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7789 p = xmlDictLookup(ctxt->dict, tmp, -1);
7790 if (tmp != NULL) xmlFree(tmp);
7791 *prefix = NULL;
7792 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 }
7794 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007795 xmlChar *tmp;
7796
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007797 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7798 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007799 NEXT;
7800 tmp = (xmlChar *) xmlParseName(ctxt);
7801 if (tmp != NULL) {
7802 tmp = xmlBuildQName(tmp, l, NULL, 0);
7803 l = xmlDictLookup(ctxt->dict, tmp, -1);
7804 if (tmp != NULL) xmlFree(tmp);
7805 *prefix = p;
7806 return(l);
7807 }
7808 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7809 l = xmlDictLookup(ctxt->dict, tmp, -1);
7810 if (tmp != NULL) xmlFree(tmp);
7811 *prefix = p;
7812 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007813 }
7814 *prefix = p;
7815 } else
7816 *prefix = NULL;
7817 return(l);
7818}
7819
7820/**
7821 * xmlParseQNameAndCompare:
7822 * @ctxt: an XML parser context
7823 * @name: the localname
7824 * @prefix: the prefix, if any.
7825 *
7826 * parse an XML name and compares for match
7827 * (specialized for endtag parsing)
7828 *
7829 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7830 * and the name for mismatch
7831 */
7832
7833static const xmlChar *
7834xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7835 xmlChar const *prefix) {
7836 const xmlChar *cmp = name;
7837 const xmlChar *in;
7838 const xmlChar *ret;
7839 const xmlChar *prefix2;
7840
7841 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7842
7843 GROW;
7844 in = ctxt->input->cur;
7845
7846 cmp = prefix;
7847 while (*in != 0 && *in == *cmp) {
7848 ++in;
7849 ++cmp;
7850 }
7851 if ((*cmp == 0) && (*in == ':')) {
7852 in++;
7853 cmp = name;
7854 while (*in != 0 && *in == *cmp) {
7855 ++in;
7856 ++cmp;
7857 }
William M. Brack76e95df2003-10-18 16:20:14 +00007858 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 /* success */
7860 ctxt->input->cur = in;
7861 return((const xmlChar*) 1);
7862 }
7863 }
7864 /*
7865 * all strings coms from the dictionary, equality can be done directly
7866 */
7867 ret = xmlParseQName (ctxt, &prefix2);
7868 if ((ret == name) && (prefix == prefix2))
7869 return((const xmlChar*) 1);
7870 return ret;
7871}
7872
7873/**
7874 * xmlParseAttValueInternal:
7875 * @ctxt: an XML parser context
7876 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007877 * @alloc: whether the attribute was reallocated as a new string
7878 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007879 *
7880 * parse a value for an attribute.
7881 * NOTE: if no normalization is needed, the routine will return pointers
7882 * directly from the data buffer.
7883 *
7884 * 3.3.3 Attribute-Value Normalization:
7885 * Before the value of an attribute is passed to the application or
7886 * checked for validity, the XML processor must normalize it as follows:
7887 * - a character reference is processed by appending the referenced
7888 * character to the attribute value
7889 * - an entity reference is processed by recursively processing the
7890 * replacement text of the entity
7891 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7892 * appending #x20 to the normalized value, except that only a single
7893 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7894 * parsed entity or the literal entity value of an internal parsed entity
7895 * - other characters are processed by appending them to the normalized value
7896 * If the declared value is not CDATA, then the XML processor must further
7897 * process the normalized attribute value by discarding any leading and
7898 * trailing space (#x20) characters, and by replacing sequences of space
7899 * (#x20) characters by a single space (#x20) character.
7900 * All attributes for which no declaration has been read should be treated
7901 * by a non-validating parser as if declared CDATA.
7902 *
7903 * Returns the AttValue parsed or NULL. The value has to be freed by the
7904 * caller if it was copied, this can be detected by val[*len] == 0.
7905 */
7906
7907static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007908xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7909 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007910{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007911 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007912 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913 xmlChar *ret = NULL;
7914
7915 GROW;
7916 in = (xmlChar *) CUR_PTR;
7917 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007918 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919 return (NULL);
7920 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007922
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007923 /*
7924 * try to handle in this routine the most common case where no
7925 * allocation of a new string is required and where content is
7926 * pure ASCII.
7927 */
7928 limit = *in++;
7929 end = ctxt->input->end;
7930 start = in;
7931 if (in >= end) {
7932 const xmlChar *oldbase = ctxt->input->base;
7933 GROW;
7934 if (oldbase != ctxt->input->base) {
7935 long delta = ctxt->input->base - oldbase;
7936 start = start + delta;
7937 in = in + delta;
7938 }
7939 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007941 if (normalize) {
7942 /*
7943 * Skip any leading spaces
7944 */
7945 while ((in < end) && (*in != limit) &&
7946 ((*in == 0x20) || (*in == 0x9) ||
7947 (*in == 0xA) || (*in == 0xD))) {
7948 in++;
7949 start = in;
7950 if (in >= end) {
7951 const xmlChar *oldbase = ctxt->input->base;
7952 GROW;
7953 if (oldbase != ctxt->input->base) {
7954 long delta = ctxt->input->base - oldbase;
7955 start = start + delta;
7956 in = in + delta;
7957 }
7958 end = ctxt->input->end;
7959 }
7960 }
7961 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7962 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7963 if ((*in++ == 0x20) && (*in == 0x20)) break;
7964 if (in >= end) {
7965 const xmlChar *oldbase = ctxt->input->base;
7966 GROW;
7967 if (oldbase != ctxt->input->base) {
7968 long delta = ctxt->input->base - oldbase;
7969 start = start + delta;
7970 in = in + delta;
7971 }
7972 end = ctxt->input->end;
7973 }
7974 }
7975 last = in;
7976 /*
7977 * skip the trailing blanks
7978 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007979 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007980 while ((in < end) && (*in != limit) &&
7981 ((*in == 0x20) || (*in == 0x9) ||
7982 (*in == 0xA) || (*in == 0xD))) {
7983 in++;
7984 if (in >= end) {
7985 const xmlChar *oldbase = ctxt->input->base;
7986 GROW;
7987 if (oldbase != ctxt->input->base) {
7988 long delta = ctxt->input->base - oldbase;
7989 start = start + delta;
7990 in = in + delta;
7991 last = last + delta;
7992 }
7993 end = ctxt->input->end;
7994 }
7995 }
7996 if (*in != limit) goto need_complex;
7997 } else {
7998 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7999 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8000 in++;
8001 if (in >= end) {
8002 const xmlChar *oldbase = ctxt->input->base;
8003 GROW;
8004 if (oldbase != ctxt->input->base) {
8005 long delta = ctxt->input->base - oldbase;
8006 start = start + delta;
8007 in = in + delta;
8008 }
8009 end = ctxt->input->end;
8010 }
8011 }
8012 last = in;
8013 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008014 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008015 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008017 *len = last - start;
8018 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008019 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008020 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008021 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 }
8023 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008024 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008025 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008026need_complex:
8027 if (alloc) *alloc = 1;
8028 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008029}
8030
8031/**
8032 * xmlParseAttribute2:
8033 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008034 * @pref: the element prefix
8035 * @elem: the element name
8036 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008037 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008038 * @len: an int * to save the length of the attribute
8039 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008040 *
8041 * parse an attribute in the new SAX2 framework.
8042 *
8043 * Returns the attribute name, and the value in *value, .
8044 */
8045
8046static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008047xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008048 const xmlChar * pref, const xmlChar * elem,
8049 const xmlChar ** prefix, xmlChar ** value,
8050 int *len, int *alloc)
8051{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008052 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008053 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008054 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008055
8056 *value = NULL;
8057 GROW;
8058 name = xmlParseQName(ctxt, prefix);
8059 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008060 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8061 "error parsing attribute name\n");
8062 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008063 }
8064
8065 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008066 * get the type if needed
8067 */
8068 if (ctxt->attsSpecial != NULL) {
8069 int type;
8070
8071 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008072 pref, elem, *prefix, name);
8073 if (type != 0)
8074 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008075 }
8076
8077 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 * read the value
8079 */
8080 SKIP_BLANKS;
8081 if (RAW == '=') {
8082 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008083 SKIP_BLANKS;
8084 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8085 if (normalize) {
8086 /*
8087 * Sometimes a second normalisation pass for spaces is needed
8088 * but that only happens if charrefs or entities refernces
8089 * have been used in the attribute value, i.e. the attribute
8090 * value have been extracted in an allocated string already.
8091 */
8092 if (*alloc) {
8093 const xmlChar *val2;
8094
8095 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8096 if (val2 != NULL) {
8097 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008098 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008099 }
8100 }
8101 }
8102 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008103 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008104 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8105 "Specification mandate value for attribute %s\n",
8106 name);
8107 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008108 }
8109
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008110 if (*prefix == ctxt->str_xml) {
8111 /*
8112 * Check that xml:lang conforms to the specification
8113 * No more registered as an error, just generate a warning now
8114 * since this was deprecated in XML second edition
8115 */
8116 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8117 internal_val = xmlStrndup(val, *len);
8118 if (!xmlCheckLanguageID(internal_val)) {
8119 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8120 "Malformed value for xml:lang : %s\n",
8121 internal_val, NULL);
8122 }
8123 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008124
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008125 /*
8126 * Check that xml:space conforms to the specification
8127 */
8128 if (xmlStrEqual(name, BAD_CAST "space")) {
8129 internal_val = xmlStrndup(val, *len);
8130 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8131 *(ctxt->space) = 0;
8132 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8133 *(ctxt->space) = 1;
8134 else {
8135 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8136 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8137 internal_val, NULL);
8138 }
8139 }
8140 if (internal_val) {
8141 xmlFree(internal_val);
8142 }
8143 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008144
8145 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008146 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008147}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008148/**
8149 * xmlParseStartTag2:
8150 * @ctxt: an XML parser context
8151 *
8152 * parse a start of tag either for rule element or
8153 * EmptyElement. In both case we don't parse the tag closing chars.
8154 * This routine is called when running SAX2 parsing
8155 *
8156 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8157 *
8158 * [ WFC: Unique Att Spec ]
8159 * No attribute name may appear more than once in the same start-tag or
8160 * empty-element tag.
8161 *
8162 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8163 *
8164 * [ WFC: Unique Att Spec ]
8165 * No attribute name may appear more than once in the same start-tag or
8166 * empty-element tag.
8167 *
8168 * With namespace:
8169 *
8170 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8171 *
8172 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8173 *
8174 * Returns the element name parsed
8175 */
8176
8177static const xmlChar *
8178xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008179 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008180 const xmlChar *localname;
8181 const xmlChar *prefix;
8182 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008183 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 const xmlChar *nsname;
8185 xmlChar *attvalue;
8186 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008188 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008189 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008190 const xmlChar *base;
8191 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008192 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193
8194 if (RAW != '<') return(NULL);
8195 NEXT1;
8196
8197 /*
8198 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8199 * point since the attribute values may be stored as pointers to
8200 * the buffer and calling SHRINK would destroy them !
8201 * The Shrinking is only possible once the full set of attribute
8202 * callbacks have been done.
8203 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008204reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008206 base = ctxt->input->base;
8207 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008208 oldline = ctxt->input->line;
8209 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008210 nbatts = 0;
8211 nratts = 0;
8212 nbdef = 0;
8213 nbNs = 0;
8214 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008215 /* Forget any namespaces added during an earlier parse of this element. */
8216 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008217
8218 localname = xmlParseQName(ctxt, &prefix);
8219 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8221 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222 return(NULL);
8223 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008224 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008225
8226 /*
8227 * Now parse the attributes, it ends up with the ending
8228 *
8229 * (S Attribute)* S?
8230 */
8231 SKIP_BLANKS;
8232 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008233 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234
8235 while ((RAW != '>') &&
8236 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008237 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008238 const xmlChar *q = CUR_PTR;
8239 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008240 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008242 attname = xmlParseAttribute2(ctxt, prefix, localname,
8243 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008244 if (ctxt->input->base != base) {
8245 if ((attvalue != NULL) && (alloc != 0))
8246 xmlFree(attvalue);
8247 attvalue = NULL;
8248 goto base_changed;
8249 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008250 if ((attname != NULL) && (attvalue != NULL)) {
8251 if (len < 0) len = xmlStrlen(attvalue);
8252 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008253 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8254 xmlURIPtr uri;
8255
8256 if (*URL != 0) {
8257 uri = xmlParseURI((const char *) URL);
8258 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008259 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8260 "xmlns: %s not a valid URI\n",
8261 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008262 } else {
8263 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008264 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8265 "xmlns: URI %s is not absolute\n",
8266 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008267 }
8268 xmlFreeURI(uri);
8269 }
8270 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008271 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008272 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008273 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008274 for (j = 1;j <= nbNs;j++)
8275 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8276 break;
8277 if (j <= nbNs)
8278 xmlErrAttributeDup(ctxt, NULL, attname);
8279 else
8280 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008281 if (alloc != 0) xmlFree(attvalue);
8282 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 continue;
8284 }
8285 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008286 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8287 xmlURIPtr uri;
8288
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008289 if (attname == ctxt->str_xml) {
8290 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008291 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8292 "xml namespace prefix mapped to wrong URI\n",
8293 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008294 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008295 /*
8296 * Do not keep a namespace definition node
8297 */
8298 if (alloc != 0) xmlFree(attvalue);
8299 SKIP_BLANKS;
8300 continue;
8301 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008302 uri = xmlParseURI((const char *) URL);
8303 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008304 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8305 "xmlns:%s: '%s' is not a valid URI\n",
8306 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008307 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008308 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008309 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8310 "xmlns:%s: URI %s is not absolute\n",
8311 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008312 }
8313 xmlFreeURI(uri);
8314 }
8315
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008317 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008318 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008319 for (j = 1;j <= nbNs;j++)
8320 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8321 break;
8322 if (j <= nbNs)
8323 xmlErrAttributeDup(ctxt, aprefix, attname);
8324 else
8325 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008326 if (alloc != 0) xmlFree(attvalue);
8327 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008328 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008329 continue;
8330 }
8331
8332 /*
8333 * Add the pair to atts
8334 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008335 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8336 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 if (attvalue[len] == 0)
8338 xmlFree(attvalue);
8339 goto failed;
8340 }
8341 maxatts = ctxt->maxatts;
8342 atts = ctxt->atts;
8343 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008344 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008345 atts[nbatts++] = attname;
8346 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008347 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008348 atts[nbatts++] = attvalue;
8349 attvalue += len;
8350 atts[nbatts++] = attvalue;
8351 /*
8352 * tag if some deallocation is needed
8353 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008354 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008355 } else {
8356 if ((attvalue != NULL) && (attvalue[len] == 0))
8357 xmlFree(attvalue);
8358 }
8359
8360failed:
8361
8362 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008363 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008364 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8365 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008366 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8368 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008369 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008370 }
8371 SKIP_BLANKS;
8372 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8373 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008374 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008375 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008376 break;
8377 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008378 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008379 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008380 }
8381
Daniel Veillard0fb18932003-09-07 09:14:37 +00008382 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008383 * The attributes defaulting
8384 */
8385 if (ctxt->attsDefault != NULL) {
8386 xmlDefAttrsPtr defaults;
8387
8388 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8389 if (defaults != NULL) {
8390 for (i = 0;i < defaults->nbAttrs;i++) {
8391 attname = defaults->values[4 * i];
8392 aprefix = defaults->values[4 * i + 1];
8393
8394 /*
8395 * special work for namespaces defaulted defs
8396 */
8397 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8398 /*
8399 * check that it's not a defined namespace
8400 */
8401 for (j = 1;j <= nbNs;j++)
8402 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8403 break;
8404 if (j <= nbNs) continue;
8405
8406 nsname = xmlGetNamespace(ctxt, NULL);
8407 if (nsname != defaults->values[4 * i + 2]) {
8408 if (nsPush(ctxt, NULL,
8409 defaults->values[4 * i + 2]) > 0)
8410 nbNs++;
8411 }
8412 } else if (aprefix == ctxt->str_xmlns) {
8413 /*
8414 * check that it's not a defined namespace
8415 */
8416 for (j = 1;j <= nbNs;j++)
8417 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8418 break;
8419 if (j <= nbNs) continue;
8420
8421 nsname = xmlGetNamespace(ctxt, attname);
8422 if (nsname != defaults->values[2]) {
8423 if (nsPush(ctxt, attname,
8424 defaults->values[4 * i + 2]) > 0)
8425 nbNs++;
8426 }
8427 } else {
8428 /*
8429 * check that it's not a defined attribute
8430 */
8431 for (j = 0;j < nbatts;j+=5) {
8432 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8433 break;
8434 }
8435 if (j < nbatts) continue;
8436
8437 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8438 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008439 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008440 }
8441 maxatts = ctxt->maxatts;
8442 atts = ctxt->atts;
8443 }
8444 atts[nbatts++] = attname;
8445 atts[nbatts++] = aprefix;
8446 if (aprefix == NULL)
8447 atts[nbatts++] = NULL;
8448 else
8449 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8450 atts[nbatts++] = defaults->values[4 * i + 2];
8451 atts[nbatts++] = defaults->values[4 * i + 3];
8452 nbdef++;
8453 }
8454 }
8455 }
8456 }
8457
Daniel Veillarde70c8772003-11-25 07:21:18 +00008458 /*
8459 * The attributes checkings
8460 */
8461 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008462 /*
8463 * The default namespace does not apply to attribute names.
8464 */
8465 if (atts[i + 1] != NULL) {
8466 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8467 if (nsname == NULL) {
8468 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8469 "Namespace prefix %s for %s on %s is not defined\n",
8470 atts[i + 1], atts[i], localname);
8471 }
8472 atts[i + 2] = nsname;
8473 } else
8474 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008475 /*
8476 * [ WFC: Unique Att Spec ]
8477 * No attribute name may appear more than once in the same
8478 * start-tag or empty-element tag.
8479 * As extended by the Namespace in XML REC.
8480 */
8481 for (j = 0; j < i;j += 5) {
8482 if (atts[i] == atts[j]) {
8483 if (atts[i+1] == atts[j+1]) {
8484 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8485 break;
8486 }
8487 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8488 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8489 "Namespaced Attribute %s in '%s' redefined\n",
8490 atts[i], nsname, NULL);
8491 break;
8492 }
8493 }
8494 }
8495 }
8496
Daniel Veillarde57ec792003-09-10 10:50:59 +00008497 nsname = xmlGetNamespace(ctxt, prefix);
8498 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008499 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8500 "Namespace prefix %s on %s is not defined\n",
8501 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008502 }
8503 *pref = prefix;
8504 *URI = nsname;
8505
8506 /*
8507 * SAX: Start of Element !
8508 */
8509 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8510 (!ctxt->disableSAX)) {
8511 if (nbNs > 0)
8512 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8513 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8514 nbatts / 5, nbdef, atts);
8515 else
8516 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8517 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8518 }
8519
8520 /*
8521 * Free up attribute allocated strings if needed
8522 */
8523 if (attval != 0) {
8524 for (i = 3,j = 0; j < nratts;i += 5,j++)
8525 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8526 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527 }
8528
8529 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008530
8531base_changed:
8532 /*
8533 * the attribute strings are valid iif the base didn't changed
8534 */
8535 if (attval != 0) {
8536 for (i = 3,j = 0; j < nratts;i += 5,j++)
8537 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8538 xmlFree((xmlChar *) atts[i]);
8539 }
8540 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008541 ctxt->input->line = oldline;
8542 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008543 if (ctxt->wellFormed == 1) {
8544 goto reparse;
8545 }
8546 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547}
8548
8549/**
8550 * xmlParseEndTag2:
8551 * @ctxt: an XML parser context
8552 * @line: line of the start tag
8553 * @nsNr: number of namespaces on the start tag
8554 *
8555 * parse an end of tag
8556 *
8557 * [42] ETag ::= '</' Name S? '>'
8558 *
8559 * With namespace
8560 *
8561 * [NS 9] ETag ::= '</' QName S? '>'
8562 */
8563
8564static void
8565xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008566 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 const xmlChar *name;
8568
8569 GROW;
8570 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008571 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008572 return;
8573 }
8574 SKIP(2);
8575
William M. Brack13dfa872004-09-18 04:52:08 +00008576 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008577 if (ctxt->input->cur[tlen] == '>') {
8578 ctxt->input->cur += tlen + 1;
8579 goto done;
8580 }
8581 ctxt->input->cur += tlen;
8582 name = (xmlChar*)1;
8583 } else {
8584 if (prefix == NULL)
8585 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8586 else
8587 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8588 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008589
8590 /*
8591 * We should definitely be at the ending "S? '>'" part
8592 */
8593 GROW;
8594 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008595 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008597 } else
8598 NEXT1;
8599
8600 /*
8601 * [ WFC: Element Type Match ]
8602 * The Name in an element's end-tag must match the element type in the
8603 * start-tag.
8604 *
8605 */
8606 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008607 if (name == NULL) name = BAD_CAST "unparseable";
8608 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008609 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008610 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008611 }
8612
8613 /*
8614 * SAX: End of Tag
8615 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008616done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008617 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8618 (!ctxt->disableSAX))
8619 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8620
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621 spacePop(ctxt);
8622 if (nsNr != 0)
8623 nsPop(ctxt, nsNr);
8624 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008625}
8626
8627/**
Owen Taylor3473f882001-02-23 17:55:21 +00008628 * xmlParseCDSect:
8629 * @ctxt: an XML parser context
8630 *
8631 * Parse escaped pure raw content.
8632 *
8633 * [18] CDSect ::= CDStart CData CDEnd
8634 *
8635 * [19] CDStart ::= '<![CDATA['
8636 *
8637 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8638 *
8639 * [21] CDEnd ::= ']]>'
8640 */
8641void
8642xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8643 xmlChar *buf = NULL;
8644 int len = 0;
8645 int size = XML_PARSER_BUFFER_SIZE;
8646 int r, rl;
8647 int s, sl;
8648 int cur, l;
8649 int count = 0;
8650
Daniel Veillard8f597c32003-10-06 08:19:27 +00008651 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008652 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008653 SKIP(9);
8654 } else
8655 return;
8656
8657 ctxt->instate = XML_PARSER_CDATA_SECTION;
8658 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008659 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008660 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008661 ctxt->instate = XML_PARSER_CONTENT;
8662 return;
8663 }
8664 NEXTL(rl);
8665 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008666 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008667 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008668 ctxt->instate = XML_PARSER_CONTENT;
8669 return;
8670 }
8671 NEXTL(sl);
8672 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008673 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008674 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008675 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008676 return;
8677 }
William M. Brack871611b2003-10-18 04:53:14 +00008678 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008679 ((r != ']') || (s != ']') || (cur != '>'))) {
8680 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008681 xmlChar *tmp;
8682
Owen Taylor3473f882001-02-23 17:55:21 +00008683 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008684 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8685 if (tmp == NULL) {
8686 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008687 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008688 return;
8689 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008690 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008691 }
8692 COPY_BUF(rl,buf,len,r);
8693 r = s;
8694 rl = sl;
8695 s = cur;
8696 sl = l;
8697 count++;
8698 if (count > 50) {
8699 GROW;
8700 count = 0;
8701 }
8702 NEXTL(l);
8703 cur = CUR_CHAR(l);
8704 }
8705 buf[len] = 0;
8706 ctxt->instate = XML_PARSER_CONTENT;
8707 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008708 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008709 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008710 xmlFree(buf);
8711 return;
8712 }
8713 NEXTL(l);
8714
8715 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008716 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008717 */
8718 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8719 if (ctxt->sax->cdataBlock != NULL)
8720 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008721 else if (ctxt->sax->characters != NULL)
8722 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008723 }
8724 xmlFree(buf);
8725}
8726
8727/**
8728 * xmlParseContent:
8729 * @ctxt: an XML parser context
8730 *
8731 * Parse a content:
8732 *
8733 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8734 */
8735
8736void
8737xmlParseContent(xmlParserCtxtPtr ctxt) {
8738 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008739 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008740 ((RAW != '<') || (NXT(1) != '/')) &&
8741 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008742 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008743 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008744 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008745
8746 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008747 * First case : a Processing Instruction.
8748 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008749 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008750 xmlParsePI(ctxt);
8751 }
8752
8753 /*
8754 * Second case : a CDSection
8755 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008756 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008757 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008758 xmlParseCDSect(ctxt);
8759 }
8760
8761 /*
8762 * Third case : a comment
8763 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008764 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008765 (NXT(2) == '-') && (NXT(3) == '-')) {
8766 xmlParseComment(ctxt);
8767 ctxt->instate = XML_PARSER_CONTENT;
8768 }
8769
8770 /*
8771 * Fourth case : a sub-element.
8772 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008773 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008774 xmlParseElement(ctxt);
8775 }
8776
8777 /*
8778 * Fifth case : a reference. If if has not been resolved,
8779 * parsing returns it's Name, create the node
8780 */
8781
Daniel Veillard21a0f912001-02-25 19:54:14 +00008782 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008783 xmlParseReference(ctxt);
8784 }
8785
8786 /*
8787 * Last case, text. Note that References are handled directly.
8788 */
8789 else {
8790 xmlParseCharData(ctxt, 0);
8791 }
8792
8793 GROW;
8794 /*
8795 * Pop-up of finished entities.
8796 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008797 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008798 xmlPopInput(ctxt);
8799 SHRINK;
8800
Daniel Veillardfdc91562002-07-01 21:52:03 +00008801 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008802 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8803 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008804 ctxt->instate = XML_PARSER_EOF;
8805 break;
8806 }
8807 }
8808}
8809
8810/**
8811 * xmlParseElement:
8812 * @ctxt: an XML parser context
8813 *
8814 * parse an XML element, this is highly recursive
8815 *
8816 * [39] element ::= EmptyElemTag | STag content ETag
8817 *
8818 * [ WFC: Element Type Match ]
8819 * The Name in an element's end-tag must match the element type in the
8820 * start-tag.
8821 *
Owen Taylor3473f882001-02-23 17:55:21 +00008822 */
8823
8824void
8825xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008826 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 const xmlChar *prefix;
8828 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008829 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008830 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008831 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008833
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008834 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8835 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8836 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8837 xmlParserMaxDepth);
8838 ctxt->instate = XML_PARSER_EOF;
8839 return;
8840 }
8841
Owen Taylor3473f882001-02-23 17:55:21 +00008842 /* Capture start position */
8843 if (ctxt->record_info) {
8844 node_info.begin_pos = ctxt->input->consumed +
8845 (CUR_PTR - ctxt->input->base);
8846 node_info.begin_line = ctxt->input->line;
8847 }
8848
8849 if (ctxt->spaceNr == 0)
8850 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008851 else if (*ctxt->space == -2)
8852 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 else
8854 spacePush(ctxt, *ctxt->space);
8855
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008856 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008857#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008859#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008860 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008861#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008862 else
8863 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008864#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008865 if (name == NULL) {
8866 spacePop(ctxt);
8867 return;
8868 }
8869 namePush(ctxt, name);
8870 ret = ctxt->node;
8871
Daniel Veillard4432df22003-09-28 18:58:27 +00008872#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008873 /*
8874 * [ VC: Root Element Type ]
8875 * The Name in the document type declaration must match the element
8876 * type of the root element.
8877 */
8878 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8879 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8880 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008881#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008882
8883 /*
8884 * Check for an Empty Element.
8885 */
8886 if ((RAW == '/') && (NXT(1) == '>')) {
8887 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008888 if (ctxt->sax2) {
8889 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8890 (!ctxt->disableSAX))
8891 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008892#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 } else {
8894 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8895 (!ctxt->disableSAX))
8896 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008897#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008898 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 namePop(ctxt);
8900 spacePop(ctxt);
8901 if (nsNr != ctxt->nsNr)
8902 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008903 if ( ret != NULL && ctxt->record_info ) {
8904 node_info.end_pos = ctxt->input->consumed +
8905 (CUR_PTR - ctxt->input->base);
8906 node_info.end_line = ctxt->input->line;
8907 node_info.node = ret;
8908 xmlParserAddNodeInfo(ctxt, &node_info);
8909 }
8910 return;
8911 }
8912 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008913 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008914 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008915 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8916 "Couldn't find end of Start Tag %s line %d\n",
8917 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008918
8919 /*
8920 * end of parsing of this node.
8921 */
8922 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008923 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008924 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008925 if (nsNr != ctxt->nsNr)
8926 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008927
8928 /*
8929 * Capture end position and add node
8930 */
8931 if ( ret != NULL && ctxt->record_info ) {
8932 node_info.end_pos = ctxt->input->consumed +
8933 (CUR_PTR - ctxt->input->base);
8934 node_info.end_line = ctxt->input->line;
8935 node_info.node = ret;
8936 xmlParserAddNodeInfo(ctxt, &node_info);
8937 }
8938 return;
8939 }
8940
8941 /*
8942 * Parse the content of the element:
8943 */
8944 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008945 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008946 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008947 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008948 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008949
8950 /*
8951 * end of parsing of this node.
8952 */
8953 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008954 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008955 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956 if (nsNr != ctxt->nsNr)
8957 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008958 return;
8959 }
8960
8961 /*
8962 * parse the end of tag: '</' should be here.
8963 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008964 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008965 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008966 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008967 }
8968#ifdef LIBXML_SAX1_ENABLED
8969 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008970 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008971#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008972
8973 /*
8974 * Capture end position and add node
8975 */
8976 if ( ret != NULL && ctxt->record_info ) {
8977 node_info.end_pos = ctxt->input->consumed +
8978 (CUR_PTR - ctxt->input->base);
8979 node_info.end_line = ctxt->input->line;
8980 node_info.node = ret;
8981 xmlParserAddNodeInfo(ctxt, &node_info);
8982 }
8983}
8984
8985/**
8986 * xmlParseVersionNum:
8987 * @ctxt: an XML parser context
8988 *
8989 * parse the XML version value.
8990 *
8991 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8992 *
8993 * Returns the string giving the XML version number, or NULL
8994 */
8995xmlChar *
8996xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8997 xmlChar *buf = NULL;
8998 int len = 0;
8999 int size = 10;
9000 xmlChar cur;
9001
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009002 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009003 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009004 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009005 return(NULL);
9006 }
9007 cur = CUR;
9008 while (((cur >= 'a') && (cur <= 'z')) ||
9009 ((cur >= 'A') && (cur <= 'Z')) ||
9010 ((cur >= '0') && (cur <= '9')) ||
9011 (cur == '_') || (cur == '.') ||
9012 (cur == ':') || (cur == '-')) {
9013 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009014 xmlChar *tmp;
9015
Owen Taylor3473f882001-02-23 17:55:21 +00009016 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009017 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9018 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009019 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009020 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009021 return(NULL);
9022 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009023 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009024 }
9025 buf[len++] = cur;
9026 NEXT;
9027 cur=CUR;
9028 }
9029 buf[len] = 0;
9030 return(buf);
9031}
9032
9033/**
9034 * xmlParseVersionInfo:
9035 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009036 *
Owen Taylor3473f882001-02-23 17:55:21 +00009037 * parse the XML version.
9038 *
9039 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009040 *
Owen Taylor3473f882001-02-23 17:55:21 +00009041 * [25] Eq ::= S? '=' S?
9042 *
9043 * Returns the version string, e.g. "1.0"
9044 */
9045
9046xmlChar *
9047xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9048 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009049
Daniel Veillarda07050d2003-10-19 14:46:32 +00009050 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009051 SKIP(7);
9052 SKIP_BLANKS;
9053 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009054 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009055 return(NULL);
9056 }
9057 NEXT;
9058 SKIP_BLANKS;
9059 if (RAW == '"') {
9060 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009061 version = xmlParseVersionNum(ctxt);
9062 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009063 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009064 } else
9065 NEXT;
9066 } else if (RAW == '\''){
9067 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009068 version = xmlParseVersionNum(ctxt);
9069 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009070 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009071 } else
9072 NEXT;
9073 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009074 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009075 }
9076 }
9077 return(version);
9078}
9079
9080/**
9081 * xmlParseEncName:
9082 * @ctxt: an XML parser context
9083 *
9084 * parse the XML encoding name
9085 *
9086 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9087 *
9088 * Returns the encoding name value or NULL
9089 */
9090xmlChar *
9091xmlParseEncName(xmlParserCtxtPtr ctxt) {
9092 xmlChar *buf = NULL;
9093 int len = 0;
9094 int size = 10;
9095 xmlChar cur;
9096
9097 cur = CUR;
9098 if (((cur >= 'a') && (cur <= 'z')) ||
9099 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009100 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009101 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009102 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009103 return(NULL);
9104 }
9105
9106 buf[len++] = cur;
9107 NEXT;
9108 cur = CUR;
9109 while (((cur >= 'a') && (cur <= 'z')) ||
9110 ((cur >= 'A') && (cur <= 'Z')) ||
9111 ((cur >= '0') && (cur <= '9')) ||
9112 (cur == '.') || (cur == '_') ||
9113 (cur == '-')) {
9114 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009115 xmlChar *tmp;
9116
Owen Taylor3473f882001-02-23 17:55:21 +00009117 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009118 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9119 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009120 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009121 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009122 return(NULL);
9123 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009124 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009125 }
9126 buf[len++] = cur;
9127 NEXT;
9128 cur = CUR;
9129 if (cur == 0) {
9130 SHRINK;
9131 GROW;
9132 cur = CUR;
9133 }
9134 }
9135 buf[len] = 0;
9136 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009137 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009138 }
9139 return(buf);
9140}
9141
9142/**
9143 * xmlParseEncodingDecl:
9144 * @ctxt: an XML parser context
9145 *
9146 * parse the XML encoding declaration
9147 *
9148 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9149 *
9150 * this setups the conversion filters.
9151 *
9152 * Returns the encoding value or NULL
9153 */
9154
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009155const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009156xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9157 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009158
9159 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009160 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009161 SKIP(8);
9162 SKIP_BLANKS;
9163 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009164 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009165 return(NULL);
9166 }
9167 NEXT;
9168 SKIP_BLANKS;
9169 if (RAW == '"') {
9170 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009171 encoding = xmlParseEncName(ctxt);
9172 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009173 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009174 } else
9175 NEXT;
9176 } else if (RAW == '\''){
9177 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009178 encoding = xmlParseEncName(ctxt);
9179 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009180 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009181 } else
9182 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009183 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009184 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009185 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009186 /*
9187 * UTF-16 encoding stwich has already taken place at this stage,
9188 * more over the little-endian/big-endian selection is already done
9189 */
9190 if ((encoding != NULL) &&
9191 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9192 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009193 if (ctxt->encoding != NULL)
9194 xmlFree((xmlChar *) ctxt->encoding);
9195 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009196 }
9197 /*
9198 * UTF-8 encoding is handled natively
9199 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009200 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009201 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9202 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009203 if (ctxt->encoding != NULL)
9204 xmlFree((xmlChar *) ctxt->encoding);
9205 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009206 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009207 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009208 xmlCharEncodingHandlerPtr handler;
9209
9210 if (ctxt->input->encoding != NULL)
9211 xmlFree((xmlChar *) ctxt->input->encoding);
9212 ctxt->input->encoding = encoding;
9213
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009214 handler = xmlFindCharEncodingHandler((const char *) encoding);
9215 if (handler != NULL) {
9216 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009217 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009218 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009219 "Unsupported encoding %s\n", encoding);
9220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009221 }
9222 }
9223 }
9224 return(encoding);
9225}
9226
9227/**
9228 * xmlParseSDDecl:
9229 * @ctxt: an XML parser context
9230 *
9231 * parse the XML standalone declaration
9232 *
9233 * [32] SDDecl ::= S 'standalone' Eq
9234 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9235 *
9236 * [ VC: Standalone Document Declaration ]
9237 * TODO The standalone document declaration must have the value "no"
9238 * if any external markup declarations contain declarations of:
9239 * - attributes with default values, if elements to which these
9240 * attributes apply appear in the document without specifications
9241 * of values for these attributes, or
9242 * - entities (other than amp, lt, gt, apos, quot), if references
9243 * to those entities appear in the document, or
9244 * - attributes with values subject to normalization, where the
9245 * attribute appears in the document with a value which will change
9246 * as a result of normalization, or
9247 * - element types with element content, if white space occurs directly
9248 * within any instance of those types.
9249 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009250 * Returns:
9251 * 1 if standalone="yes"
9252 * 0 if standalone="no"
9253 * -2 if standalone attribute is missing or invalid
9254 * (A standalone value of -2 means that the XML declaration was found,
9255 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009256 */
9257
9258int
9259xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009260 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009261
9262 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009263 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009264 SKIP(10);
9265 SKIP_BLANKS;
9266 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009267 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009268 return(standalone);
9269 }
9270 NEXT;
9271 SKIP_BLANKS;
9272 if (RAW == '\''){
9273 NEXT;
9274 if ((RAW == 'n') && (NXT(1) == 'o')) {
9275 standalone = 0;
9276 SKIP(2);
9277 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9278 (NXT(2) == 's')) {
9279 standalone = 1;
9280 SKIP(3);
9281 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009282 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009283 }
9284 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009285 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009286 } else
9287 NEXT;
9288 } else if (RAW == '"'){
9289 NEXT;
9290 if ((RAW == 'n') && (NXT(1) == 'o')) {
9291 standalone = 0;
9292 SKIP(2);
9293 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9294 (NXT(2) == 's')) {
9295 standalone = 1;
9296 SKIP(3);
9297 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009298 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009299 }
9300 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009301 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009302 } else
9303 NEXT;
9304 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009305 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009306 }
9307 }
9308 return(standalone);
9309}
9310
9311/**
9312 * xmlParseXMLDecl:
9313 * @ctxt: an XML parser context
9314 *
9315 * parse an XML declaration header
9316 *
9317 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9318 */
9319
9320void
9321xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9322 xmlChar *version;
9323
9324 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009325 * This value for standalone indicates that the document has an
9326 * XML declaration but it does not have a standalone attribute.
9327 * It will be overwritten later if a standalone attribute is found.
9328 */
9329 ctxt->input->standalone = -2;
9330
9331 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009332 * We know that '<?xml' is here.
9333 */
9334 SKIP(5);
9335
William M. Brack76e95df2003-10-18 16:20:14 +00009336 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009337 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9338 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009339 }
9340 SKIP_BLANKS;
9341
9342 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009343 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009344 */
9345 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009346 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009347 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009348 } else {
9349 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9350 /*
9351 * TODO: Blueberry should be detected here
9352 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009353 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9354 "Unsupported version '%s'\n",
9355 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009356 }
9357 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009358 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009359 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009360 }
Owen Taylor3473f882001-02-23 17:55:21 +00009361
9362 /*
9363 * We may have the encoding declaration
9364 */
William M. Brack76e95df2003-10-18 16:20:14 +00009365 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009366 if ((RAW == '?') && (NXT(1) == '>')) {
9367 SKIP(2);
9368 return;
9369 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009370 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009371 }
9372 xmlParseEncodingDecl(ctxt);
9373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9374 /*
9375 * The XML REC instructs us to stop parsing right here
9376 */
9377 return;
9378 }
9379
9380 /*
9381 * We may have the standalone status.
9382 */
William M. Brack76e95df2003-10-18 16:20:14 +00009383 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009384 if ((RAW == '?') && (NXT(1) == '>')) {
9385 SKIP(2);
9386 return;
9387 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009389 }
9390 SKIP_BLANKS;
9391 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9392
9393 SKIP_BLANKS;
9394 if ((RAW == '?') && (NXT(1) == '>')) {
9395 SKIP(2);
9396 } else if (RAW == '>') {
9397 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009398 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009399 NEXT;
9400 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009401 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009402 MOVETO_ENDTAG(CUR_PTR);
9403 NEXT;
9404 }
9405}
9406
9407/**
9408 * xmlParseMisc:
9409 * @ctxt: an XML parser context
9410 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009411 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009412 *
9413 * [27] Misc ::= Comment | PI | S
9414 */
9415
9416void
9417xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009418 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009419 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009420 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009421 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009422 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009423 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009424 NEXT;
9425 } else
9426 xmlParseComment(ctxt);
9427 }
9428}
9429
9430/**
9431 * xmlParseDocument:
9432 * @ctxt: an XML parser context
9433 *
9434 * parse an XML document (and build a tree if using the standard SAX
9435 * interface).
9436 *
9437 * [1] document ::= prolog element Misc*
9438 *
9439 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9440 *
9441 * Returns 0, -1 in case of error. the parser context is augmented
9442 * as a result of the parsing.
9443 */
9444
9445int
9446xmlParseDocument(xmlParserCtxtPtr ctxt) {
9447 xmlChar start[4];
9448 xmlCharEncoding enc;
9449
9450 xmlInitParser();
9451
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009452 if ((ctxt == NULL) || (ctxt->input == NULL))
9453 return(-1);
9454
Owen Taylor3473f882001-02-23 17:55:21 +00009455 GROW;
9456
9457 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009458 * SAX: detecting the level.
9459 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009460 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009461
9462 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009463 * SAX: beginning of the document processing.
9464 */
9465 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9466 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9467
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009468 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9469 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009470 /*
9471 * Get the 4 first bytes and decode the charset
9472 * if enc != XML_CHAR_ENCODING_NONE
9473 * plug some encoding conversion routines.
9474 */
9475 start[0] = RAW;
9476 start[1] = NXT(1);
9477 start[2] = NXT(2);
9478 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009479 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009480 if (enc != XML_CHAR_ENCODING_NONE) {
9481 xmlSwitchEncoding(ctxt, enc);
9482 }
Owen Taylor3473f882001-02-23 17:55:21 +00009483 }
9484
9485
9486 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009487 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009488 }
9489
9490 /*
9491 * Check for the XMLDecl in the Prolog.
9492 */
9493 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009494 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009495
9496 /*
9497 * Note that we will switch encoding on the fly.
9498 */
9499 xmlParseXMLDecl(ctxt);
9500 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9501 /*
9502 * The XML REC instructs us to stop parsing right here
9503 */
9504 return(-1);
9505 }
9506 ctxt->standalone = ctxt->input->standalone;
9507 SKIP_BLANKS;
9508 } else {
9509 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9510 }
9511 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9512 ctxt->sax->startDocument(ctxt->userData);
9513
9514 /*
9515 * The Misc part of the Prolog
9516 */
9517 GROW;
9518 xmlParseMisc(ctxt);
9519
9520 /*
9521 * Then possibly doc type declaration(s) and more Misc
9522 * (doctypedecl Misc*)?
9523 */
9524 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009525 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009526
9527 ctxt->inSubset = 1;
9528 xmlParseDocTypeDecl(ctxt);
9529 if (RAW == '[') {
9530 ctxt->instate = XML_PARSER_DTD;
9531 xmlParseInternalSubset(ctxt);
9532 }
9533
9534 /*
9535 * Create and update the external subset.
9536 */
9537 ctxt->inSubset = 2;
9538 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9539 (!ctxt->disableSAX))
9540 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9541 ctxt->extSubSystem, ctxt->extSubURI);
9542 ctxt->inSubset = 0;
9543
Daniel Veillardac4118d2008-01-11 05:27:32 +00009544 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009545
9546 ctxt->instate = XML_PARSER_PROLOG;
9547 xmlParseMisc(ctxt);
9548 }
9549
9550 /*
9551 * Time to start parsing the tree itself
9552 */
9553 GROW;
9554 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009555 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9556 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009557 } else {
9558 ctxt->instate = XML_PARSER_CONTENT;
9559 xmlParseElement(ctxt);
9560 ctxt->instate = XML_PARSER_EPILOG;
9561
9562
9563 /*
9564 * The Misc part at the end
9565 */
9566 xmlParseMisc(ctxt);
9567
Daniel Veillard561b7f82002-03-20 21:55:57 +00009568 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009569 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009570 }
9571 ctxt->instate = XML_PARSER_EOF;
9572 }
9573
9574 /*
9575 * SAX: end of the document processing.
9576 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009577 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009578 ctxt->sax->endDocument(ctxt->userData);
9579
Daniel Veillard5997aca2002-03-18 18:36:20 +00009580 /*
9581 * Remove locally kept entity definitions if the tree was not built
9582 */
9583 if ((ctxt->myDoc != NULL) &&
9584 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9585 xmlFreeDoc(ctxt->myDoc);
9586 ctxt->myDoc = NULL;
9587 }
9588
Daniel Veillardc7612992002-02-17 22:47:37 +00009589 if (! ctxt->wellFormed) {
9590 ctxt->valid = 0;
9591 return(-1);
9592 }
Owen Taylor3473f882001-02-23 17:55:21 +00009593 return(0);
9594}
9595
9596/**
9597 * xmlParseExtParsedEnt:
9598 * @ctxt: an XML parser context
9599 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009600 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009601 * An external general parsed entity is well-formed if it matches the
9602 * production labeled extParsedEnt.
9603 *
9604 * [78] extParsedEnt ::= TextDecl? content
9605 *
9606 * Returns 0, -1 in case of error. the parser context is augmented
9607 * as a result of the parsing.
9608 */
9609
9610int
9611xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9612 xmlChar start[4];
9613 xmlCharEncoding enc;
9614
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009615 if ((ctxt == NULL) || (ctxt->input == NULL))
9616 return(-1);
9617
Owen Taylor3473f882001-02-23 17:55:21 +00009618 xmlDefaultSAXHandlerInit();
9619
Daniel Veillard309f81d2003-09-23 09:02:53 +00009620 xmlDetectSAX2(ctxt);
9621
Owen Taylor3473f882001-02-23 17:55:21 +00009622 GROW;
9623
9624 /*
9625 * SAX: beginning of the document processing.
9626 */
9627 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9628 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9629
9630 /*
9631 * Get the 4 first bytes and decode the charset
9632 * if enc != XML_CHAR_ENCODING_NONE
9633 * plug some encoding conversion routines.
9634 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009635 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9636 start[0] = RAW;
9637 start[1] = NXT(1);
9638 start[2] = NXT(2);
9639 start[3] = NXT(3);
9640 enc = xmlDetectCharEncoding(start, 4);
9641 if (enc != XML_CHAR_ENCODING_NONE) {
9642 xmlSwitchEncoding(ctxt, enc);
9643 }
Owen Taylor3473f882001-02-23 17:55:21 +00009644 }
9645
9646
9647 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009648 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009649 }
9650
9651 /*
9652 * Check for the XMLDecl in the Prolog.
9653 */
9654 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009655 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009656
9657 /*
9658 * Note that we will switch encoding on the fly.
9659 */
9660 xmlParseXMLDecl(ctxt);
9661 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9662 /*
9663 * The XML REC instructs us to stop parsing right here
9664 */
9665 return(-1);
9666 }
9667 SKIP_BLANKS;
9668 } else {
9669 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9670 }
9671 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9672 ctxt->sax->startDocument(ctxt->userData);
9673
9674 /*
9675 * Doing validity checking on chunk doesn't make sense
9676 */
9677 ctxt->instate = XML_PARSER_CONTENT;
9678 ctxt->validate = 0;
9679 ctxt->loadsubset = 0;
9680 ctxt->depth = 0;
9681
9682 xmlParseContent(ctxt);
9683
9684 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009685 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009686 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009687 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009688 }
9689
9690 /*
9691 * SAX: end of the document processing.
9692 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009693 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009694 ctxt->sax->endDocument(ctxt->userData);
9695
9696 if (! ctxt->wellFormed) return(-1);
9697 return(0);
9698}
9699
Daniel Veillard73b013f2003-09-30 12:36:01 +00009700#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009701/************************************************************************
9702 * *
9703 * Progressive parsing interfaces *
9704 * *
9705 ************************************************************************/
9706
9707/**
9708 * xmlParseLookupSequence:
9709 * @ctxt: an XML parser context
9710 * @first: the first char to lookup
9711 * @next: the next char to lookup or zero
9712 * @third: the next char to lookup or zero
9713 *
9714 * Try to find if a sequence (first, next, third) or just (first next) or
9715 * (first) is available in the input stream.
9716 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9717 * to avoid rescanning sequences of bytes, it DOES change the state of the
9718 * parser, do not use liberally.
9719 *
9720 * Returns the index to the current parsing point if the full sequence
9721 * is available, -1 otherwise.
9722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009723static int
Owen Taylor3473f882001-02-23 17:55:21 +00009724xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9725 xmlChar next, xmlChar third) {
9726 int base, len;
9727 xmlParserInputPtr in;
9728 const xmlChar *buf;
9729
9730 in = ctxt->input;
9731 if (in == NULL) return(-1);
9732 base = in->cur - in->base;
9733 if (base < 0) return(-1);
9734 if (ctxt->checkIndex > base)
9735 base = ctxt->checkIndex;
9736 if (in->buf == NULL) {
9737 buf = in->base;
9738 len = in->length;
9739 } else {
9740 buf = in->buf->buffer->content;
9741 len = in->buf->buffer->use;
9742 }
9743 /* take into account the sequence length */
9744 if (third) len -= 2;
9745 else if (next) len --;
9746 for (;base < len;base++) {
9747 if (buf[base] == first) {
9748 if (third != 0) {
9749 if ((buf[base + 1] != next) ||
9750 (buf[base + 2] != third)) continue;
9751 } else if (next != 0) {
9752 if (buf[base + 1] != next) continue;
9753 }
9754 ctxt->checkIndex = 0;
9755#ifdef DEBUG_PUSH
9756 if (next == 0)
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: lookup '%c' found at %d\n",
9759 first, base);
9760 else if (third == 0)
9761 xmlGenericError(xmlGenericErrorContext,
9762 "PP: lookup '%c%c' found at %d\n",
9763 first, next, base);
9764 else
9765 xmlGenericError(xmlGenericErrorContext,
9766 "PP: lookup '%c%c%c' found at %d\n",
9767 first, next, third, base);
9768#endif
9769 return(base - (in->cur - in->base));
9770 }
9771 }
9772 ctxt->checkIndex = base;
9773#ifdef DEBUG_PUSH
9774 if (next == 0)
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: lookup '%c' failed\n", first);
9777 else if (third == 0)
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: lookup '%c%c' failed\n", first, next);
9780 else
9781 xmlGenericError(xmlGenericErrorContext,
9782 "PP: lookup '%c%c%c' failed\n", first, next, third);
9783#endif
9784 return(-1);
9785}
9786
9787/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009788 * xmlParseGetLasts:
9789 * @ctxt: an XML parser context
9790 * @lastlt: pointer to store the last '<' from the input
9791 * @lastgt: pointer to store the last '>' from the input
9792 *
9793 * Lookup the last < and > in the current chunk
9794 */
9795static void
9796xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9797 const xmlChar **lastgt) {
9798 const xmlChar *tmp;
9799
9800 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9801 xmlGenericError(xmlGenericErrorContext,
9802 "Internal error: xmlParseGetLasts\n");
9803 return;
9804 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009805 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 tmp = ctxt->input->end;
9807 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009808 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 if (tmp < ctxt->input->base) {
9810 *lastlt = NULL;
9811 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009812 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009813 *lastlt = tmp;
9814 tmp++;
9815 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9816 if (*tmp == '\'') {
9817 tmp++;
9818 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9819 if (tmp < ctxt->input->end) tmp++;
9820 } else if (*tmp == '"') {
9821 tmp++;
9822 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9823 if (tmp < ctxt->input->end) tmp++;
9824 } else
9825 tmp++;
9826 }
9827 if (tmp < ctxt->input->end)
9828 *lastgt = tmp;
9829 else {
9830 tmp = *lastlt;
9831 tmp--;
9832 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9833 if (tmp >= ctxt->input->base)
9834 *lastgt = tmp;
9835 else
9836 *lastgt = NULL;
9837 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009839 } else {
9840 *lastlt = NULL;
9841 *lastgt = NULL;
9842 }
9843}
9844/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009845 * xmlCheckCdataPush:
9846 * @cur: pointer to the bock of characters
9847 * @len: length of the block in bytes
9848 *
9849 * Check that the block of characters is okay as SCdata content [20]
9850 *
9851 * Returns the number of bytes to pass if okay, a negative index where an
9852 * UTF-8 error occured otherwise
9853 */
9854static int
9855xmlCheckCdataPush(const xmlChar *utf, int len) {
9856 int ix;
9857 unsigned char c;
9858 int codepoint;
9859
9860 if ((utf == NULL) || (len <= 0))
9861 return(0);
9862
9863 for (ix = 0; ix < len;) { /* string is 0-terminated */
9864 c = utf[ix];
9865 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9866 if (c >= 0x20)
9867 ix++;
9868 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9869 ix++;
9870 else
9871 return(-ix);
9872 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9873 if (ix + 2 > len) return(ix);
9874 if ((utf[ix+1] & 0xc0 ) != 0x80)
9875 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009876 codepoint = (utf[ix] & 0x1f) << 6;
9877 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009878 if (!xmlIsCharQ(codepoint))
9879 return(-ix);
9880 ix += 2;
9881 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9882 if (ix + 3 > len) return(ix);
9883 if (((utf[ix+1] & 0xc0) != 0x80) ||
9884 ((utf[ix+2] & 0xc0) != 0x80))
9885 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009886 codepoint = (utf[ix] & 0xf) << 12;
9887 codepoint |= (utf[ix+1] & 0x3f) << 6;
9888 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009889 if (!xmlIsCharQ(codepoint))
9890 return(-ix);
9891 ix += 3;
9892 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9893 if (ix + 4 > len) return(ix);
9894 if (((utf[ix+1] & 0xc0) != 0x80) ||
9895 ((utf[ix+2] & 0xc0) != 0x80) ||
9896 ((utf[ix+3] & 0xc0) != 0x80))
9897 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009898 codepoint = (utf[ix] & 0x7) << 18;
9899 codepoint |= (utf[ix+1] & 0x3f) << 12;
9900 codepoint |= (utf[ix+2] & 0x3f) << 6;
9901 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009902 if (!xmlIsCharQ(codepoint))
9903 return(-ix);
9904 ix += 4;
9905 } else /* unknown encoding */
9906 return(-ix);
9907 }
9908 return(ix);
9909}
9910
9911/**
Owen Taylor3473f882001-02-23 17:55:21 +00009912 * xmlParseTryOrFinish:
9913 * @ctxt: an XML parser context
9914 * @terminate: last chunk indicator
9915 *
9916 * Try to progress on parsing
9917 *
9918 * Returns zero if no parsing was possible
9919 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009920static int
Owen Taylor3473f882001-02-23 17:55:21 +00009921xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9922 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009923 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009924 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009925 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009926
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009927 if (ctxt->input == NULL)
9928 return(0);
9929
Owen Taylor3473f882001-02-23 17:55:21 +00009930#ifdef DEBUG_PUSH
9931 switch (ctxt->instate) {
9932 case XML_PARSER_EOF:
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: try EOF\n"); break;
9935 case XML_PARSER_START:
9936 xmlGenericError(xmlGenericErrorContext,
9937 "PP: try START\n"); break;
9938 case XML_PARSER_MISC:
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: try MISC\n");break;
9941 case XML_PARSER_COMMENT:
9942 xmlGenericError(xmlGenericErrorContext,
9943 "PP: try COMMENT\n");break;
9944 case XML_PARSER_PROLOG:
9945 xmlGenericError(xmlGenericErrorContext,
9946 "PP: try PROLOG\n");break;
9947 case XML_PARSER_START_TAG:
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: try START_TAG\n");break;
9950 case XML_PARSER_CONTENT:
9951 xmlGenericError(xmlGenericErrorContext,
9952 "PP: try CONTENT\n");break;
9953 case XML_PARSER_CDATA_SECTION:
9954 xmlGenericError(xmlGenericErrorContext,
9955 "PP: try CDATA_SECTION\n");break;
9956 case XML_PARSER_END_TAG:
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: try END_TAG\n");break;
9959 case XML_PARSER_ENTITY_DECL:
9960 xmlGenericError(xmlGenericErrorContext,
9961 "PP: try ENTITY_DECL\n");break;
9962 case XML_PARSER_ENTITY_VALUE:
9963 xmlGenericError(xmlGenericErrorContext,
9964 "PP: try ENTITY_VALUE\n");break;
9965 case XML_PARSER_ATTRIBUTE_VALUE:
9966 xmlGenericError(xmlGenericErrorContext,
9967 "PP: try ATTRIBUTE_VALUE\n");break;
9968 case XML_PARSER_DTD:
9969 xmlGenericError(xmlGenericErrorContext,
9970 "PP: try DTD\n");break;
9971 case XML_PARSER_EPILOG:
9972 xmlGenericError(xmlGenericErrorContext,
9973 "PP: try EPILOG\n");break;
9974 case XML_PARSER_PI:
9975 xmlGenericError(xmlGenericErrorContext,
9976 "PP: try PI\n");break;
9977 case XML_PARSER_IGNORE:
9978 xmlGenericError(xmlGenericErrorContext,
9979 "PP: try IGNORE\n");break;
9980 }
9981#endif
9982
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009983 if ((ctxt->input != NULL) &&
9984 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009985 xmlSHRINK(ctxt);
9986 ctxt->checkIndex = 0;
9987 }
9988 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009989
Daniel Veillarda880b122003-04-21 21:36:41 +00009990 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009991 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009992 return(0);
9993
9994
Owen Taylor3473f882001-02-23 17:55:21 +00009995 /*
9996 * Pop-up of finished entities.
9997 */
9998 while ((RAW == 0) && (ctxt->inputNr > 1))
9999 xmlPopInput(ctxt);
10000
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010001 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010002 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010003 avail = ctxt->input->length -
10004 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010005 else {
10006 /*
10007 * If we are operating on converted input, try to flush
10008 * remainng chars to avoid them stalling in the non-converted
10009 * buffer.
10010 */
10011 if ((ctxt->input->buf->raw != NULL) &&
10012 (ctxt->input->buf->raw->use > 0)) {
10013 int base = ctxt->input->base -
10014 ctxt->input->buf->buffer->content;
10015 int current = ctxt->input->cur - ctxt->input->base;
10016
10017 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10018 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10019 ctxt->input->cur = ctxt->input->base + current;
10020 ctxt->input->end =
10021 &ctxt->input->buf->buffer->content[
10022 ctxt->input->buf->buffer->use];
10023 }
10024 avail = ctxt->input->buf->buffer->use -
10025 (ctxt->input->cur - ctxt->input->base);
10026 }
Owen Taylor3473f882001-02-23 17:55:21 +000010027 if (avail < 1)
10028 goto done;
10029 switch (ctxt->instate) {
10030 case XML_PARSER_EOF:
10031 /*
10032 * Document parsing is done !
10033 */
10034 goto done;
10035 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010036 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10037 xmlChar start[4];
10038 xmlCharEncoding enc;
10039
10040 /*
10041 * Very first chars read from the document flow.
10042 */
10043 if (avail < 4)
10044 goto done;
10045
10046 /*
10047 * Get the 4 first bytes and decode the charset
10048 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010049 * plug some encoding conversion routines,
10050 * else xmlSwitchEncoding will set to (default)
10051 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010052 */
10053 start[0] = RAW;
10054 start[1] = NXT(1);
10055 start[2] = NXT(2);
10056 start[3] = NXT(3);
10057 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010058 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010059 break;
10060 }
Owen Taylor3473f882001-02-23 17:55:21 +000010061
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010062 if (avail < 2)
10063 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010064 cur = ctxt->input->cur[0];
10065 next = ctxt->input->cur[1];
10066 if (cur == 0) {
10067 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10068 ctxt->sax->setDocumentLocator(ctxt->userData,
10069 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010070 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010071 ctxt->instate = XML_PARSER_EOF;
10072#ifdef DEBUG_PUSH
10073 xmlGenericError(xmlGenericErrorContext,
10074 "PP: entering EOF\n");
10075#endif
10076 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10077 ctxt->sax->endDocument(ctxt->userData);
10078 goto done;
10079 }
10080 if ((cur == '<') && (next == '?')) {
10081 /* PI or XML decl */
10082 if (avail < 5) return(ret);
10083 if ((!terminate) &&
10084 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10085 return(ret);
10086 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10087 ctxt->sax->setDocumentLocator(ctxt->userData,
10088 &xmlDefaultSAXLocator);
10089 if ((ctxt->input->cur[2] == 'x') &&
10090 (ctxt->input->cur[3] == 'm') &&
10091 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010092 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010093 ret += 5;
10094#ifdef DEBUG_PUSH
10095 xmlGenericError(xmlGenericErrorContext,
10096 "PP: Parsing XML Decl\n");
10097#endif
10098 xmlParseXMLDecl(ctxt);
10099 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10100 /*
10101 * The XML REC instructs us to stop parsing right
10102 * here
10103 */
10104 ctxt->instate = XML_PARSER_EOF;
10105 return(0);
10106 }
10107 ctxt->standalone = ctxt->input->standalone;
10108 if ((ctxt->encoding == NULL) &&
10109 (ctxt->input->encoding != NULL))
10110 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10111 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10112 (!ctxt->disableSAX))
10113 ctxt->sax->startDocument(ctxt->userData);
10114 ctxt->instate = XML_PARSER_MISC;
10115#ifdef DEBUG_PUSH
10116 xmlGenericError(xmlGenericErrorContext,
10117 "PP: entering MISC\n");
10118#endif
10119 } else {
10120 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10121 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10122 (!ctxt->disableSAX))
10123 ctxt->sax->startDocument(ctxt->userData);
10124 ctxt->instate = XML_PARSER_MISC;
10125#ifdef DEBUG_PUSH
10126 xmlGenericError(xmlGenericErrorContext,
10127 "PP: entering MISC\n");
10128#endif
10129 }
10130 } else {
10131 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10132 ctxt->sax->setDocumentLocator(ctxt->userData,
10133 &xmlDefaultSAXLocator);
10134 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010135 if (ctxt->version == NULL) {
10136 xmlErrMemory(ctxt, NULL);
10137 break;
10138 }
Owen Taylor3473f882001-02-23 17:55:21 +000010139 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10140 (!ctxt->disableSAX))
10141 ctxt->sax->startDocument(ctxt->userData);
10142 ctxt->instate = XML_PARSER_MISC;
10143#ifdef DEBUG_PUSH
10144 xmlGenericError(xmlGenericErrorContext,
10145 "PP: entering MISC\n");
10146#endif
10147 }
10148 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010149 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010150 const xmlChar *name;
10151 const xmlChar *prefix;
10152 const xmlChar *URI;
10153 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010154
10155 if ((avail < 2) && (ctxt->inputNr == 1))
10156 goto done;
10157 cur = ctxt->input->cur[0];
10158 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010159 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010160 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010161 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10162 ctxt->sax->endDocument(ctxt->userData);
10163 goto done;
10164 }
10165 if (!terminate) {
10166 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010167 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010168 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010169 goto done;
10170 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10171 goto done;
10172 }
10173 }
10174 if (ctxt->spaceNr == 0)
10175 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010176 else if (*ctxt->space == -2)
10177 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010178 else
10179 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010180#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010181 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010182#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010183 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010184#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010185 else
10186 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010187#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010188 if (name == NULL) {
10189 spacePop(ctxt);
10190 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010191 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10192 ctxt->sax->endDocument(ctxt->userData);
10193 goto done;
10194 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010195#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010196 /*
10197 * [ VC: Root Element Type ]
10198 * The Name in the document type declaration must match
10199 * the element type of the root element.
10200 */
10201 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10202 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10203 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010204#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010205
10206 /*
10207 * Check for an Empty Element.
10208 */
10209 if ((RAW == '/') && (NXT(1) == '>')) {
10210 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010211
10212 if (ctxt->sax2) {
10213 if ((ctxt->sax != NULL) &&
10214 (ctxt->sax->endElementNs != NULL) &&
10215 (!ctxt->disableSAX))
10216 ctxt->sax->endElementNs(ctxt->userData, name,
10217 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010218 if (ctxt->nsNr - nsNr > 0)
10219 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010220#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010221 } else {
10222 if ((ctxt->sax != NULL) &&
10223 (ctxt->sax->endElement != NULL) &&
10224 (!ctxt->disableSAX))
10225 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010226#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010227 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010228 spacePop(ctxt);
10229 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010230 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010231 } else {
10232 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010233 }
10234 break;
10235 }
10236 if (RAW == '>') {
10237 NEXT;
10238 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010239 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010240 "Couldn't find end of Start Tag %s\n",
10241 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010242 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010243 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010244 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010245 if (ctxt->sax2)
10246 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010247#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010248 else
10249 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010250#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010251
Daniel Veillarda880b122003-04-21 21:36:41 +000010252 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010253 break;
10254 }
10255 case XML_PARSER_CONTENT: {
10256 const xmlChar *test;
10257 unsigned int cons;
10258 if ((avail < 2) && (ctxt->inputNr == 1))
10259 goto done;
10260 cur = ctxt->input->cur[0];
10261 next = ctxt->input->cur[1];
10262
10263 test = CUR_PTR;
10264 cons = ctxt->input->consumed;
10265 if ((cur == '<') && (next == '/')) {
10266 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010267 break;
10268 } else if ((cur == '<') && (next == '?')) {
10269 if ((!terminate) &&
10270 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10271 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010272 xmlParsePI(ctxt);
10273 } else if ((cur == '<') && (next != '!')) {
10274 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010275 break;
10276 } else if ((cur == '<') && (next == '!') &&
10277 (ctxt->input->cur[2] == '-') &&
10278 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010279 int term;
10280
10281 if (avail < 4)
10282 goto done;
10283 ctxt->input->cur += 4;
10284 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10285 ctxt->input->cur -= 4;
10286 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010287 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010288 xmlParseComment(ctxt);
10289 ctxt->instate = XML_PARSER_CONTENT;
10290 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10291 (ctxt->input->cur[2] == '[') &&
10292 (ctxt->input->cur[3] == 'C') &&
10293 (ctxt->input->cur[4] == 'D') &&
10294 (ctxt->input->cur[5] == 'A') &&
10295 (ctxt->input->cur[6] == 'T') &&
10296 (ctxt->input->cur[7] == 'A') &&
10297 (ctxt->input->cur[8] == '[')) {
10298 SKIP(9);
10299 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010300 break;
10301 } else if ((cur == '<') && (next == '!') &&
10302 (avail < 9)) {
10303 goto done;
10304 } else if (cur == '&') {
10305 if ((!terminate) &&
10306 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10307 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010308 xmlParseReference(ctxt);
10309 } else {
10310 /* TODO Avoid the extra copy, handle directly !!! */
10311 /*
10312 * Goal of the following test is:
10313 * - minimize calls to the SAX 'character' callback
10314 * when they are mergeable
10315 * - handle an problem for isBlank when we only parse
10316 * a sequence of blank chars and the next one is
10317 * not available to check against '<' presence.
10318 * - tries to homogenize the differences in SAX
10319 * callbacks between the push and pull versions
10320 * of the parser.
10321 */
10322 if ((ctxt->inputNr == 1) &&
10323 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10324 if (!terminate) {
10325 if (ctxt->progressive) {
10326 if ((lastlt == NULL) ||
10327 (ctxt->input->cur > lastlt))
10328 goto done;
10329 } else if (xmlParseLookupSequence(ctxt,
10330 '<', 0, 0) < 0) {
10331 goto done;
10332 }
10333 }
10334 }
10335 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010336 xmlParseCharData(ctxt, 0);
10337 }
10338 /*
10339 * Pop-up of finished entities.
10340 */
10341 while ((RAW == 0) && (ctxt->inputNr > 1))
10342 xmlPopInput(ctxt);
10343 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010344 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10345 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010346 ctxt->instate = XML_PARSER_EOF;
10347 break;
10348 }
10349 break;
10350 }
10351 case XML_PARSER_END_TAG:
10352 if (avail < 2)
10353 goto done;
10354 if (!terminate) {
10355 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010356 /* > can be found unescaped in attribute values */
10357 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010358 goto done;
10359 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10360 goto done;
10361 }
10362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010363 if (ctxt->sax2) {
10364 xmlParseEndTag2(ctxt,
10365 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10366 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010367 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010368 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010369 }
10370#ifdef LIBXML_SAX1_ENABLED
10371 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010372 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010373#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010374 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010375 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010376 } else {
10377 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010378 }
10379 break;
10380 case XML_PARSER_CDATA_SECTION: {
10381 /*
10382 * The Push mode need to have the SAX callback for
10383 * cdataBlock merge back contiguous callbacks.
10384 */
10385 int base;
10386
10387 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10388 if (base < 0) {
10389 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010390 int tmp;
10391
10392 tmp = xmlCheckCdataPush(ctxt->input->cur,
10393 XML_PARSER_BIG_BUFFER_SIZE);
10394 if (tmp < 0) {
10395 tmp = -tmp;
10396 ctxt->input->cur += tmp;
10397 goto encoding_error;
10398 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010399 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10400 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010401 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010402 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010403 else if (ctxt->sax->characters != NULL)
10404 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010405 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010406 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010407 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010408 ctxt->checkIndex = 0;
10409 }
10410 goto done;
10411 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010412 int tmp;
10413
10414 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10415 if ((tmp < 0) || (tmp != base)) {
10416 tmp = -tmp;
10417 ctxt->input->cur += tmp;
10418 goto encoding_error;
10419 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010420 if ((ctxt->sax != NULL) && (base == 0) &&
10421 (ctxt->sax->cdataBlock != NULL) &&
10422 (!ctxt->disableSAX)) {
10423 /*
10424 * Special case to provide identical behaviour
10425 * between pull and push parsers on enpty CDATA
10426 * sections
10427 */
10428 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10429 (!strncmp((const char *)&ctxt->input->cur[-9],
10430 "<![CDATA[", 9)))
10431 ctxt->sax->cdataBlock(ctxt->userData,
10432 BAD_CAST "", 0);
10433 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010434 (!ctxt->disableSAX)) {
10435 if (ctxt->sax->cdataBlock != NULL)
10436 ctxt->sax->cdataBlock(ctxt->userData,
10437 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010438 else if (ctxt->sax->characters != NULL)
10439 ctxt->sax->characters(ctxt->userData,
10440 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010441 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010442 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010443 ctxt->checkIndex = 0;
10444 ctxt->instate = XML_PARSER_CONTENT;
10445#ifdef DEBUG_PUSH
10446 xmlGenericError(xmlGenericErrorContext,
10447 "PP: entering CONTENT\n");
10448#endif
10449 }
10450 break;
10451 }
Owen Taylor3473f882001-02-23 17:55:21 +000010452 case XML_PARSER_MISC:
10453 SKIP_BLANKS;
10454 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010455 avail = ctxt->input->length -
10456 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010457 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010458 avail = ctxt->input->buf->buffer->use -
10459 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010460 if (avail < 2)
10461 goto done;
10462 cur = ctxt->input->cur[0];
10463 next = ctxt->input->cur[1];
10464 if ((cur == '<') && (next == '?')) {
10465 if ((!terminate) &&
10466 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10467 goto done;
10468#ifdef DEBUG_PUSH
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: Parsing PI\n");
10471#endif
10472 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010473 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010474 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010475 (ctxt->input->cur[2] == '-') &&
10476 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010477 if ((!terminate) &&
10478 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10479 goto done;
10480#ifdef DEBUG_PUSH
10481 xmlGenericError(xmlGenericErrorContext,
10482 "PP: Parsing Comment\n");
10483#endif
10484 xmlParseComment(ctxt);
10485 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010486 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010487 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010488 (ctxt->input->cur[2] == 'D') &&
10489 (ctxt->input->cur[3] == 'O') &&
10490 (ctxt->input->cur[4] == 'C') &&
10491 (ctxt->input->cur[5] == 'T') &&
10492 (ctxt->input->cur[6] == 'Y') &&
10493 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010494 (ctxt->input->cur[8] == 'E')) {
10495 if ((!terminate) &&
10496 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10497 goto done;
10498#ifdef DEBUG_PUSH
10499 xmlGenericError(xmlGenericErrorContext,
10500 "PP: Parsing internal subset\n");
10501#endif
10502 ctxt->inSubset = 1;
10503 xmlParseDocTypeDecl(ctxt);
10504 if (RAW == '[') {
10505 ctxt->instate = XML_PARSER_DTD;
10506#ifdef DEBUG_PUSH
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: entering DTD\n");
10509#endif
10510 } else {
10511 /*
10512 * Create and update the external subset.
10513 */
10514 ctxt->inSubset = 2;
10515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10516 (ctxt->sax->externalSubset != NULL))
10517 ctxt->sax->externalSubset(ctxt->userData,
10518 ctxt->intSubName, ctxt->extSubSystem,
10519 ctxt->extSubURI);
10520 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010521 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010522 ctxt->instate = XML_PARSER_PROLOG;
10523#ifdef DEBUG_PUSH
10524 xmlGenericError(xmlGenericErrorContext,
10525 "PP: entering PROLOG\n");
10526#endif
10527 }
10528 } else if ((cur == '<') && (next == '!') &&
10529 (avail < 9)) {
10530 goto done;
10531 } else {
10532 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010533 ctxt->progressive = 1;
10534 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010535#ifdef DEBUG_PUSH
10536 xmlGenericError(xmlGenericErrorContext,
10537 "PP: entering START_TAG\n");
10538#endif
10539 }
10540 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010541 case XML_PARSER_PROLOG:
10542 SKIP_BLANKS;
10543 if (ctxt->input->buf == NULL)
10544 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10545 else
10546 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10547 if (avail < 2)
10548 goto done;
10549 cur = ctxt->input->cur[0];
10550 next = ctxt->input->cur[1];
10551 if ((cur == '<') && (next == '?')) {
10552 if ((!terminate) &&
10553 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10554 goto done;
10555#ifdef DEBUG_PUSH
10556 xmlGenericError(xmlGenericErrorContext,
10557 "PP: Parsing PI\n");
10558#endif
10559 xmlParsePI(ctxt);
10560 } else if ((cur == '<') && (next == '!') &&
10561 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10562 if ((!terminate) &&
10563 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10564 goto done;
10565#ifdef DEBUG_PUSH
10566 xmlGenericError(xmlGenericErrorContext,
10567 "PP: Parsing Comment\n");
10568#endif
10569 xmlParseComment(ctxt);
10570 ctxt->instate = XML_PARSER_PROLOG;
10571 } else if ((cur == '<') && (next == '!') &&
10572 (avail < 4)) {
10573 goto done;
10574 } else {
10575 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010576 if (ctxt->progressive == 0)
10577 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010578 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010579#ifdef DEBUG_PUSH
10580 xmlGenericError(xmlGenericErrorContext,
10581 "PP: entering START_TAG\n");
10582#endif
10583 }
10584 break;
10585 case XML_PARSER_EPILOG:
10586 SKIP_BLANKS;
10587 if (ctxt->input->buf == NULL)
10588 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10589 else
10590 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10591 if (avail < 2)
10592 goto done;
10593 cur = ctxt->input->cur[0];
10594 next = ctxt->input->cur[1];
10595 if ((cur == '<') && (next == '?')) {
10596 if ((!terminate) &&
10597 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10598 goto done;
10599#ifdef DEBUG_PUSH
10600 xmlGenericError(xmlGenericErrorContext,
10601 "PP: Parsing PI\n");
10602#endif
10603 xmlParsePI(ctxt);
10604 ctxt->instate = XML_PARSER_EPILOG;
10605 } else if ((cur == '<') && (next == '!') &&
10606 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10607 if ((!terminate) &&
10608 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10609 goto done;
10610#ifdef DEBUG_PUSH
10611 xmlGenericError(xmlGenericErrorContext,
10612 "PP: Parsing Comment\n");
10613#endif
10614 xmlParseComment(ctxt);
10615 ctxt->instate = XML_PARSER_EPILOG;
10616 } else if ((cur == '<') && (next == '!') &&
10617 (avail < 4)) {
10618 goto done;
10619 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010620 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010621 ctxt->instate = XML_PARSER_EOF;
10622#ifdef DEBUG_PUSH
10623 xmlGenericError(xmlGenericErrorContext,
10624 "PP: entering EOF\n");
10625#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010626 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010627 ctxt->sax->endDocument(ctxt->userData);
10628 goto done;
10629 }
10630 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010631 case XML_PARSER_DTD: {
10632 /*
10633 * Sorry but progressive parsing of the internal subset
10634 * is not expected to be supported. We first check that
10635 * the full content of the internal subset is available and
10636 * the parsing is launched only at that point.
10637 * Internal subset ends up with "']' S? '>'" in an unescaped
10638 * section and not in a ']]>' sequence which are conditional
10639 * sections (whoever argued to keep that crap in XML deserve
10640 * a place in hell !).
10641 */
10642 int base, i;
10643 xmlChar *buf;
10644 xmlChar quote = 0;
10645
10646 base = ctxt->input->cur - ctxt->input->base;
10647 if (base < 0) return(0);
10648 if (ctxt->checkIndex > base)
10649 base = ctxt->checkIndex;
10650 buf = ctxt->input->buf->buffer->content;
10651 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10652 base++) {
10653 if (quote != 0) {
10654 if (buf[base] == quote)
10655 quote = 0;
10656 continue;
10657 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010658 if ((quote == 0) && (buf[base] == '<')) {
10659 int found = 0;
10660 /* special handling of comments */
10661 if (((unsigned int) base + 4 <
10662 ctxt->input->buf->buffer->use) &&
10663 (buf[base + 1] == '!') &&
10664 (buf[base + 2] == '-') &&
10665 (buf[base + 3] == '-')) {
10666 for (;(unsigned int) base + 3 <
10667 ctxt->input->buf->buffer->use; base++) {
10668 if ((buf[base] == '-') &&
10669 (buf[base + 1] == '-') &&
10670 (buf[base + 2] == '>')) {
10671 found = 1;
10672 base += 2;
10673 break;
10674 }
10675 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010676 if (!found) {
10677#if 0
10678 fprintf(stderr, "unfinished comment\n");
10679#endif
10680 break; /* for */
10681 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010682 continue;
10683 }
10684 }
Owen Taylor3473f882001-02-23 17:55:21 +000010685 if (buf[base] == '"') {
10686 quote = '"';
10687 continue;
10688 }
10689 if (buf[base] == '\'') {
10690 quote = '\'';
10691 continue;
10692 }
10693 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010694#if 0
10695 fprintf(stderr, "%c%c%c%c: ", buf[base],
10696 buf[base + 1], buf[base + 2], buf[base + 3]);
10697#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010698 if ((unsigned int) base +1 >=
10699 ctxt->input->buf->buffer->use)
10700 break;
10701 if (buf[base + 1] == ']') {
10702 /* conditional crap, skip both ']' ! */
10703 base++;
10704 continue;
10705 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010706 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010707 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10708 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010709 if (buf[base + i] == '>') {
10710#if 0
10711 fprintf(stderr, "found\n");
10712#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010713 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010714 }
10715 if (!IS_BLANK_CH(buf[base + i])) {
10716#if 0
10717 fprintf(stderr, "not found\n");
10718#endif
10719 goto not_end_of_int_subset;
10720 }
Owen Taylor3473f882001-02-23 17:55:21 +000010721 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010722#if 0
10723 fprintf(stderr, "end of stream\n");
10724#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010725 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010726
Owen Taylor3473f882001-02-23 17:55:21 +000010727 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010728not_end_of_int_subset:
10729 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010730 }
10731 /*
10732 * We didn't found the end of the Internal subset
10733 */
Owen Taylor3473f882001-02-23 17:55:21 +000010734#ifdef DEBUG_PUSH
10735 if (next == 0)
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: lookup of int subset end filed\n");
10738#endif
10739 goto done;
10740
10741found_end_int_subset:
10742 xmlParseInternalSubset(ctxt);
10743 ctxt->inSubset = 2;
10744 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10745 (ctxt->sax->externalSubset != NULL))
10746 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10747 ctxt->extSubSystem, ctxt->extSubURI);
10748 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010749 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010750 ctxt->instate = XML_PARSER_PROLOG;
10751 ctxt->checkIndex = 0;
10752#ifdef DEBUG_PUSH
10753 xmlGenericError(xmlGenericErrorContext,
10754 "PP: entering PROLOG\n");
10755#endif
10756 break;
10757 }
10758 case XML_PARSER_COMMENT:
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: internal error, state == COMMENT\n");
10761 ctxt->instate = XML_PARSER_CONTENT;
10762#ifdef DEBUG_PUSH
10763 xmlGenericError(xmlGenericErrorContext,
10764 "PP: entering CONTENT\n");
10765#endif
10766 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010767 case XML_PARSER_IGNORE:
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: internal error, state == IGNORE");
10770 ctxt->instate = XML_PARSER_DTD;
10771#ifdef DEBUG_PUSH
10772 xmlGenericError(xmlGenericErrorContext,
10773 "PP: entering DTD\n");
10774#endif
10775 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010776 case XML_PARSER_PI:
10777 xmlGenericError(xmlGenericErrorContext,
10778 "PP: internal error, state == PI\n");
10779 ctxt->instate = XML_PARSER_CONTENT;
10780#ifdef DEBUG_PUSH
10781 xmlGenericError(xmlGenericErrorContext,
10782 "PP: entering CONTENT\n");
10783#endif
10784 break;
10785 case XML_PARSER_ENTITY_DECL:
10786 xmlGenericError(xmlGenericErrorContext,
10787 "PP: internal error, state == ENTITY_DECL\n");
10788 ctxt->instate = XML_PARSER_DTD;
10789#ifdef DEBUG_PUSH
10790 xmlGenericError(xmlGenericErrorContext,
10791 "PP: entering DTD\n");
10792#endif
10793 break;
10794 case XML_PARSER_ENTITY_VALUE:
10795 xmlGenericError(xmlGenericErrorContext,
10796 "PP: internal error, state == ENTITY_VALUE\n");
10797 ctxt->instate = XML_PARSER_CONTENT;
10798#ifdef DEBUG_PUSH
10799 xmlGenericError(xmlGenericErrorContext,
10800 "PP: entering DTD\n");
10801#endif
10802 break;
10803 case XML_PARSER_ATTRIBUTE_VALUE:
10804 xmlGenericError(xmlGenericErrorContext,
10805 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10806 ctxt->instate = XML_PARSER_START_TAG;
10807#ifdef DEBUG_PUSH
10808 xmlGenericError(xmlGenericErrorContext,
10809 "PP: entering START_TAG\n");
10810#endif
10811 break;
10812 case XML_PARSER_SYSTEM_LITERAL:
10813 xmlGenericError(xmlGenericErrorContext,
10814 "PP: internal error, state == SYSTEM_LITERAL\n");
10815 ctxt->instate = XML_PARSER_START_TAG;
10816#ifdef DEBUG_PUSH
10817 xmlGenericError(xmlGenericErrorContext,
10818 "PP: entering START_TAG\n");
10819#endif
10820 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010821 case XML_PARSER_PUBLIC_LITERAL:
10822 xmlGenericError(xmlGenericErrorContext,
10823 "PP: internal error, state == PUBLIC_LITERAL\n");
10824 ctxt->instate = XML_PARSER_START_TAG;
10825#ifdef DEBUG_PUSH
10826 xmlGenericError(xmlGenericErrorContext,
10827 "PP: entering START_TAG\n");
10828#endif
10829 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010830 }
10831 }
10832done:
10833#ifdef DEBUG_PUSH
10834 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10835#endif
10836 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010837encoding_error:
10838 {
10839 char buffer[150];
10840
10841 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10842 ctxt->input->cur[0], ctxt->input->cur[1],
10843 ctxt->input->cur[2], ctxt->input->cur[3]);
10844 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10845 "Input is not proper UTF-8, indicate encoding !\n%s",
10846 BAD_CAST buffer, NULL);
10847 }
10848 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010849}
10850
10851/**
Owen Taylor3473f882001-02-23 17:55:21 +000010852 * xmlParseChunk:
10853 * @ctxt: an XML parser context
10854 * @chunk: an char array
10855 * @size: the size in byte of the chunk
10856 * @terminate: last chunk indicator
10857 *
10858 * Parse a Chunk of memory
10859 *
10860 * Returns zero if no error, the xmlParserErrors otherwise.
10861 */
10862int
10863xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10864 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010865 int end_in_lf = 0;
10866
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010867 if (ctxt == NULL)
10868 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010869 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010870 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010871 if (ctxt->instate == XML_PARSER_START)
10872 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010873 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10874 (chunk[size - 1] == '\r')) {
10875 end_in_lf = 1;
10876 size--;
10877 }
Owen Taylor3473f882001-02-23 17:55:21 +000010878 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10879 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10880 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10881 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010882 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010883
William M. Bracka3215c72004-07-31 16:24:01 +000010884 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10885 if (res < 0) {
10886 ctxt->errNo = XML_PARSER_EOF;
10887 ctxt->disableSAX = 1;
10888 return (XML_PARSER_EOF);
10889 }
Owen Taylor3473f882001-02-23 17:55:21 +000010890 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10891 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010892 ctxt->input->end =
10893 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010894#ifdef DEBUG_PUSH
10895 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10896#endif
10897
Owen Taylor3473f882001-02-23 17:55:21 +000010898 } else if (ctxt->instate != XML_PARSER_EOF) {
10899 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10900 xmlParserInputBufferPtr in = ctxt->input->buf;
10901 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10902 (in->raw != NULL)) {
10903 int nbchars;
10904
10905 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10906 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010907 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010908 xmlGenericError(xmlGenericErrorContext,
10909 "xmlParseChunk: encoder error\n");
10910 return(XML_ERR_INVALID_ENCODING);
10911 }
10912 }
10913 }
10914 }
10915 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010916 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10917 (ctxt->input->buf != NULL)) {
10918 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10919 }
Daniel Veillard14412512005-01-21 23:53:26 +000010920 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010921 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010922 if (terminate) {
10923 /*
10924 * Check for termination
10925 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010926 int avail = 0;
10927
10928 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010929 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010930 avail = ctxt->input->length -
10931 (ctxt->input->cur - ctxt->input->base);
10932 else
10933 avail = ctxt->input->buf->buffer->use -
10934 (ctxt->input->cur - ctxt->input->base);
10935 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010936
Owen Taylor3473f882001-02-23 17:55:21 +000010937 if ((ctxt->instate != XML_PARSER_EOF) &&
10938 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010939 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010940 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010941 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010943 }
Owen Taylor3473f882001-02-23 17:55:21 +000010944 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010945 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010946 ctxt->sax->endDocument(ctxt->userData);
10947 }
10948 ctxt->instate = XML_PARSER_EOF;
10949 }
10950 return((xmlParserErrors) ctxt->errNo);
10951}
10952
10953/************************************************************************
10954 * *
10955 * I/O front end functions to the parser *
10956 * *
10957 ************************************************************************/
10958
10959/**
Owen Taylor3473f882001-02-23 17:55:21 +000010960 * xmlCreatePushParserCtxt:
10961 * @sax: a SAX handler
10962 * @user_data: The user data returned on SAX callbacks
10963 * @chunk: a pointer to an array of chars
10964 * @size: number of chars in the array
10965 * @filename: an optional file name or URI
10966 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010967 * Create a parser context for using the XML parser in push mode.
10968 * If @buffer and @size are non-NULL, the data is used to detect
10969 * the encoding. The remaining characters will be parsed so they
10970 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010971 * To allow content encoding detection, @size should be >= 4
10972 * The value of @filename is used for fetching external entities
10973 * and error/warning reports.
10974 *
10975 * Returns the new parser context or NULL
10976 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010977
Owen Taylor3473f882001-02-23 17:55:21 +000010978xmlParserCtxtPtr
10979xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10980 const char *chunk, int size, const char *filename) {
10981 xmlParserCtxtPtr ctxt;
10982 xmlParserInputPtr inputStream;
10983 xmlParserInputBufferPtr buf;
10984 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10985
10986 /*
10987 * plug some encoding conversion routines
10988 */
10989 if ((chunk != NULL) && (size >= 4))
10990 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10991
10992 buf = xmlAllocParserInputBuffer(enc);
10993 if (buf == NULL) return(NULL);
10994
10995 ctxt = xmlNewParserCtxt();
10996 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010997 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010998 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010999 return(NULL);
11000 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011001 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011002 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11003 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011004 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011005 xmlFreeParserInputBuffer(buf);
11006 xmlFreeParserCtxt(ctxt);
11007 return(NULL);
11008 }
Owen Taylor3473f882001-02-23 17:55:21 +000011009 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011010#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011011 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011012#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011013 xmlFree(ctxt->sax);
11014 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11015 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011016 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011017 xmlFreeParserInputBuffer(buf);
11018 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011019 return(NULL);
11020 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011021 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11022 if (sax->initialized == XML_SAX2_MAGIC)
11023 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11024 else
11025 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011026 if (user_data != NULL)
11027 ctxt->userData = user_data;
11028 }
11029 if (filename == NULL) {
11030 ctxt->directory = NULL;
11031 } else {
11032 ctxt->directory = xmlParserGetDirectory(filename);
11033 }
11034
11035 inputStream = xmlNewInputStream(ctxt);
11036 if (inputStream == NULL) {
11037 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011038 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011039 return(NULL);
11040 }
11041
11042 if (filename == NULL)
11043 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011044 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011045 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011046 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011047 if (inputStream->filename == NULL) {
11048 xmlFreeParserCtxt(ctxt);
11049 xmlFreeParserInputBuffer(buf);
11050 return(NULL);
11051 }
11052 }
Owen Taylor3473f882001-02-23 17:55:21 +000011053 inputStream->buf = buf;
11054 inputStream->base = inputStream->buf->buffer->content;
11055 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011056 inputStream->end =
11057 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011058
11059 inputPush(ctxt, inputStream);
11060
William M. Brack3a1cd212005-02-11 14:35:54 +000011061 /*
11062 * If the caller didn't provide an initial 'chunk' for determining
11063 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11064 * that it can be automatically determined later
11065 */
11066 if ((size == 0) || (chunk == NULL)) {
11067 ctxt->charset = XML_CHAR_ENCODING_NONE;
11068 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011069 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11070 int cur = ctxt->input->cur - ctxt->input->base;
11071
Owen Taylor3473f882001-02-23 17:55:21 +000011072 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011073
11074 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11075 ctxt->input->cur = ctxt->input->base + cur;
11076 ctxt->input->end =
11077 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011078#ifdef DEBUG_PUSH
11079 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11080#endif
11081 }
11082
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011083 if (enc != XML_CHAR_ENCODING_NONE) {
11084 xmlSwitchEncoding(ctxt, enc);
11085 }
11086
Owen Taylor3473f882001-02-23 17:55:21 +000011087 return(ctxt);
11088}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011089#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011090
11091/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011092 * xmlStopParser:
11093 * @ctxt: an XML parser context
11094 *
11095 * Blocks further parser processing
11096 */
11097void
11098xmlStopParser(xmlParserCtxtPtr ctxt) {
11099 if (ctxt == NULL)
11100 return;
11101 ctxt->instate = XML_PARSER_EOF;
11102 ctxt->disableSAX = 1;
11103 if (ctxt->input != NULL) {
11104 ctxt->input->cur = BAD_CAST"";
11105 ctxt->input->base = ctxt->input->cur;
11106 }
11107}
11108
11109/**
Owen Taylor3473f882001-02-23 17:55:21 +000011110 * xmlCreateIOParserCtxt:
11111 * @sax: a SAX handler
11112 * @user_data: The user data returned on SAX callbacks
11113 * @ioread: an I/O read function
11114 * @ioclose: an I/O close function
11115 * @ioctx: an I/O handler
11116 * @enc: the charset encoding if known
11117 *
11118 * Create a parser context for using the XML parser with an existing
11119 * I/O stream
11120 *
11121 * Returns the new parser context or NULL
11122 */
11123xmlParserCtxtPtr
11124xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11125 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11126 void *ioctx, xmlCharEncoding enc) {
11127 xmlParserCtxtPtr ctxt;
11128 xmlParserInputPtr inputStream;
11129 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011130
11131 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011132
11133 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11134 if (buf == NULL) return(NULL);
11135
11136 ctxt = xmlNewParserCtxt();
11137 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011138 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011139 return(NULL);
11140 }
11141 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011142#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011143 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011144#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011145 xmlFree(ctxt->sax);
11146 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11147 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011148 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011149 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011150 return(NULL);
11151 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011152 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11153 if (sax->initialized == XML_SAX2_MAGIC)
11154 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11155 else
11156 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011157 if (user_data != NULL)
11158 ctxt->userData = user_data;
11159 }
11160
11161 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11162 if (inputStream == NULL) {
11163 xmlFreeParserCtxt(ctxt);
11164 return(NULL);
11165 }
11166 inputPush(ctxt, inputStream);
11167
11168 return(ctxt);
11169}
11170
Daniel Veillard4432df22003-09-28 18:58:27 +000011171#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011172/************************************************************************
11173 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011174 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011175 * *
11176 ************************************************************************/
11177
11178/**
11179 * xmlIOParseDTD:
11180 * @sax: the SAX handler block or NULL
11181 * @input: an Input Buffer
11182 * @enc: the charset encoding if known
11183 *
11184 * Load and parse a DTD
11185 *
11186 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011187 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011188 */
11189
11190xmlDtdPtr
11191xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11192 xmlCharEncoding enc) {
11193 xmlDtdPtr ret = NULL;
11194 xmlParserCtxtPtr ctxt;
11195 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011196 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011197
11198 if (input == NULL)
11199 return(NULL);
11200
11201 ctxt = xmlNewParserCtxt();
11202 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011203 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011204 return(NULL);
11205 }
11206
11207 /*
11208 * Set-up the SAX context
11209 */
11210 if (sax != NULL) {
11211 if (ctxt->sax != NULL)
11212 xmlFree(ctxt->sax);
11213 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011214 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011215 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011216 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011217
11218 /*
11219 * generate a parser input from the I/O handler
11220 */
11221
Daniel Veillard43caefb2003-12-07 19:32:22 +000011222 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011223 if (pinput == NULL) {
11224 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011225 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011226 xmlFreeParserCtxt(ctxt);
11227 return(NULL);
11228 }
11229
11230 /*
11231 * plug some encoding conversion routines here.
11232 */
11233 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011234 if (enc != XML_CHAR_ENCODING_NONE) {
11235 xmlSwitchEncoding(ctxt, enc);
11236 }
Owen Taylor3473f882001-02-23 17:55:21 +000011237
11238 pinput->filename = NULL;
11239 pinput->line = 1;
11240 pinput->col = 1;
11241 pinput->base = ctxt->input->cur;
11242 pinput->cur = ctxt->input->cur;
11243 pinput->free = NULL;
11244
11245 /*
11246 * let's parse that entity knowing it's an external subset.
11247 */
11248 ctxt->inSubset = 2;
11249 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11250 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11251 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011252
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011253 if ((enc == XML_CHAR_ENCODING_NONE) &&
11254 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011255 /*
11256 * Get the 4 first bytes and decode the charset
11257 * if enc != XML_CHAR_ENCODING_NONE
11258 * plug some encoding conversion routines.
11259 */
11260 start[0] = RAW;
11261 start[1] = NXT(1);
11262 start[2] = NXT(2);
11263 start[3] = NXT(3);
11264 enc = xmlDetectCharEncoding(start, 4);
11265 if (enc != XML_CHAR_ENCODING_NONE) {
11266 xmlSwitchEncoding(ctxt, enc);
11267 }
11268 }
11269
Owen Taylor3473f882001-02-23 17:55:21 +000011270 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11271
11272 if (ctxt->myDoc != NULL) {
11273 if (ctxt->wellFormed) {
11274 ret = ctxt->myDoc->extSubset;
11275 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011276 if (ret != NULL) {
11277 xmlNodePtr tmp;
11278
11279 ret->doc = NULL;
11280 tmp = ret->children;
11281 while (tmp != NULL) {
11282 tmp->doc = NULL;
11283 tmp = tmp->next;
11284 }
11285 }
Owen Taylor3473f882001-02-23 17:55:21 +000011286 } else {
11287 ret = NULL;
11288 }
11289 xmlFreeDoc(ctxt->myDoc);
11290 ctxt->myDoc = NULL;
11291 }
11292 if (sax != NULL) ctxt->sax = NULL;
11293 xmlFreeParserCtxt(ctxt);
11294
11295 return(ret);
11296}
11297
11298/**
11299 * xmlSAXParseDTD:
11300 * @sax: the SAX handler block
11301 * @ExternalID: a NAME* containing the External ID of the DTD
11302 * @SystemID: a NAME* containing the URL to the DTD
11303 *
11304 * Load and parse an external subset.
11305 *
11306 * Returns the resulting xmlDtdPtr or NULL in case of error.
11307 */
11308
11309xmlDtdPtr
11310xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11311 const xmlChar *SystemID) {
11312 xmlDtdPtr ret = NULL;
11313 xmlParserCtxtPtr ctxt;
11314 xmlParserInputPtr input = NULL;
11315 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011316 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011317
11318 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11319
11320 ctxt = xmlNewParserCtxt();
11321 if (ctxt == NULL) {
11322 return(NULL);
11323 }
11324
11325 /*
11326 * Set-up the SAX context
11327 */
11328 if (sax != NULL) {
11329 if (ctxt->sax != NULL)
11330 xmlFree(ctxt->sax);
11331 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011332 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011333 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011334
11335 /*
11336 * Canonicalise the system ID
11337 */
11338 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011339 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011340 xmlFreeParserCtxt(ctxt);
11341 return(NULL);
11342 }
Owen Taylor3473f882001-02-23 17:55:21 +000011343
11344 /*
11345 * Ask the Entity resolver to load the damn thing
11346 */
11347
11348 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011349 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11350 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011351 if (input == NULL) {
11352 if (sax != NULL) ctxt->sax = NULL;
11353 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011354 if (systemIdCanonic != NULL)
11355 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011356 return(NULL);
11357 }
11358
11359 /*
11360 * plug some encoding conversion routines here.
11361 */
11362 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011363 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11364 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11365 xmlSwitchEncoding(ctxt, enc);
11366 }
Owen Taylor3473f882001-02-23 17:55:21 +000011367
11368 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011369 input->filename = (char *) systemIdCanonic;
11370 else
11371 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011372 input->line = 1;
11373 input->col = 1;
11374 input->base = ctxt->input->cur;
11375 input->cur = ctxt->input->cur;
11376 input->free = NULL;
11377
11378 /*
11379 * let's parse that entity knowing it's an external subset.
11380 */
11381 ctxt->inSubset = 2;
11382 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11383 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11384 ExternalID, SystemID);
11385 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11386
11387 if (ctxt->myDoc != NULL) {
11388 if (ctxt->wellFormed) {
11389 ret = ctxt->myDoc->extSubset;
11390 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011391 if (ret != NULL) {
11392 xmlNodePtr tmp;
11393
11394 ret->doc = NULL;
11395 tmp = ret->children;
11396 while (tmp != NULL) {
11397 tmp->doc = NULL;
11398 tmp = tmp->next;
11399 }
11400 }
Owen Taylor3473f882001-02-23 17:55:21 +000011401 } else {
11402 ret = NULL;
11403 }
11404 xmlFreeDoc(ctxt->myDoc);
11405 ctxt->myDoc = NULL;
11406 }
11407 if (sax != NULL) ctxt->sax = NULL;
11408 xmlFreeParserCtxt(ctxt);
11409
11410 return(ret);
11411}
11412
Daniel Veillard4432df22003-09-28 18:58:27 +000011413
Owen Taylor3473f882001-02-23 17:55:21 +000011414/**
11415 * xmlParseDTD:
11416 * @ExternalID: a NAME* containing the External ID of the DTD
11417 * @SystemID: a NAME* containing the URL to the DTD
11418 *
11419 * Load and parse an external subset.
11420 *
11421 * Returns the resulting xmlDtdPtr or NULL in case of error.
11422 */
11423
11424xmlDtdPtr
11425xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11426 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11427}
Daniel Veillard4432df22003-09-28 18:58:27 +000011428#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011429
11430/************************************************************************
11431 * *
11432 * Front ends when parsing an Entity *
11433 * *
11434 ************************************************************************/
11435
11436/**
Owen Taylor3473f882001-02-23 17:55:21 +000011437 * xmlParseCtxtExternalEntity:
11438 * @ctx: the existing parsing context
11439 * @URL: the URL for the entity to load
11440 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011441 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011442 *
11443 * Parse an external general entity within an existing parsing context
11444 * An external general parsed entity is well-formed if it matches the
11445 * production labeled extParsedEnt.
11446 *
11447 * [78] extParsedEnt ::= TextDecl? content
11448 *
11449 * Returns 0 if the entity is well formed, -1 in case of args problem and
11450 * the parser error code otherwise
11451 */
11452
11453int
11454xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011455 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011456 xmlParserCtxtPtr ctxt;
11457 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011458 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011459 xmlSAXHandlerPtr oldsax = NULL;
11460 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011461 xmlChar start[4];
11462 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011463 xmlParserInputPtr inputStream;
11464 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011465
Daniel Veillardce682bc2004-11-05 17:22:25 +000011466 if (ctx == NULL) return(-1);
11467
Owen Taylor3473f882001-02-23 17:55:21 +000011468 if (ctx->depth > 40) {
11469 return(XML_ERR_ENTITY_LOOP);
11470 }
11471
Daniel Veillardcda96922001-08-21 10:56:31 +000011472 if (lst != NULL)
11473 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011474 if ((URL == NULL) && (ID == NULL))
11475 return(-1);
11476 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11477 return(-1);
11478
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011479 ctxt = xmlNewParserCtxt();
11480 if (ctxt == NULL) {
11481 return(-1);
11482 }
11483
Owen Taylor3473f882001-02-23 17:55:21 +000011484 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011485 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011486
11487 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11488 if (inputStream == NULL) {
11489 xmlFreeParserCtxt(ctxt);
11490 return(-1);
11491 }
11492
11493 inputPush(ctxt, inputStream);
11494
11495 if ((ctxt->directory == NULL) && (directory == NULL))
11496 directory = xmlParserGetDirectory((char *)URL);
11497 if ((ctxt->directory == NULL) && (directory != NULL))
11498 ctxt->directory = directory;
11499
Owen Taylor3473f882001-02-23 17:55:21 +000011500 oldsax = ctxt->sax;
11501 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011502 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011503 newDoc = xmlNewDoc(BAD_CAST "1.0");
11504 if (newDoc == NULL) {
11505 xmlFreeParserCtxt(ctxt);
11506 return(-1);
11507 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011508 if (ctx->myDoc->dict) {
11509 newDoc->dict = ctx->myDoc->dict;
11510 xmlDictReference(newDoc->dict);
11511 }
Owen Taylor3473f882001-02-23 17:55:21 +000011512 if (ctx->myDoc != NULL) {
11513 newDoc->intSubset = ctx->myDoc->intSubset;
11514 newDoc->extSubset = ctx->myDoc->extSubset;
11515 }
11516 if (ctx->myDoc->URL != NULL) {
11517 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11518 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011519 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11520 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011521 ctxt->sax = oldsax;
11522 xmlFreeParserCtxt(ctxt);
11523 newDoc->intSubset = NULL;
11524 newDoc->extSubset = NULL;
11525 xmlFreeDoc(newDoc);
11526 return(-1);
11527 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011528 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011529 nodePush(ctxt, newDoc->children);
11530 if (ctx->myDoc == NULL) {
11531 ctxt->myDoc = newDoc;
11532 } else {
11533 ctxt->myDoc = ctx->myDoc;
11534 newDoc->children->doc = ctx->myDoc;
11535 }
11536
Daniel Veillard87a764e2001-06-20 17:41:10 +000011537 /*
11538 * Get the 4 first bytes and decode the charset
11539 * if enc != XML_CHAR_ENCODING_NONE
11540 * plug some encoding conversion routines.
11541 */
11542 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011543 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11544 start[0] = RAW;
11545 start[1] = NXT(1);
11546 start[2] = NXT(2);
11547 start[3] = NXT(3);
11548 enc = xmlDetectCharEncoding(start, 4);
11549 if (enc != XML_CHAR_ENCODING_NONE) {
11550 xmlSwitchEncoding(ctxt, enc);
11551 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011552 }
11553
Owen Taylor3473f882001-02-23 17:55:21 +000011554 /*
11555 * Parse a possible text declaration first
11556 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011557 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011558 xmlParseTextDecl(ctxt);
11559 }
11560
11561 /*
11562 * Doing validity checking on chunk doesn't make sense
11563 */
11564 ctxt->instate = XML_PARSER_CONTENT;
11565 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011566 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011567 ctxt->loadsubset = ctx->loadsubset;
11568 ctxt->depth = ctx->depth + 1;
11569 ctxt->replaceEntities = ctx->replaceEntities;
11570 if (ctxt->validate) {
11571 ctxt->vctxt.error = ctx->vctxt.error;
11572 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011573 } else {
11574 ctxt->vctxt.error = NULL;
11575 ctxt->vctxt.warning = NULL;
11576 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011577 ctxt->vctxt.nodeTab = NULL;
11578 ctxt->vctxt.nodeNr = 0;
11579 ctxt->vctxt.nodeMax = 0;
11580 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011581 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11582 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011583 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11584 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11585 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011586 ctxt->dictNames = ctx->dictNames;
11587 ctxt->attsDefault = ctx->attsDefault;
11588 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011589 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011590
11591 xmlParseContent(ctxt);
11592
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011593 ctx->validate = ctxt->validate;
11594 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011595 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011596 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011597 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011598 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011599 }
11600 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011601 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011602 }
11603
11604 if (!ctxt->wellFormed) {
11605 if (ctxt->errNo == 0)
11606 ret = 1;
11607 else
11608 ret = ctxt->errNo;
11609 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011610 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011611 xmlNodePtr cur;
11612
11613 /*
11614 * Return the newly created nodeset after unlinking it from
11615 * they pseudo parent.
11616 */
11617 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011618 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011619 while (cur != NULL) {
11620 cur->parent = NULL;
11621 cur = cur->next;
11622 }
11623 newDoc->children->children = NULL;
11624 }
11625 ret = 0;
11626 }
11627 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011628 ctxt->dict = NULL;
11629 ctxt->attsDefault = NULL;
11630 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011631 xmlFreeParserCtxt(ctxt);
11632 newDoc->intSubset = NULL;
11633 newDoc->extSubset = NULL;
11634 xmlFreeDoc(newDoc);
11635
11636 return(ret);
11637}
11638
11639/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011640 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011641 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011642 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011643 * @sax: the SAX handler bloc (possibly NULL)
11644 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11645 * @depth: Used for loop detection, use 0
11646 * @URL: the URL for the entity to load
11647 * @ID: the System ID for the entity to load
11648 * @list: the return value for the set of parsed nodes
11649 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011650 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011651 *
11652 * Returns 0 if the entity is well formed, -1 in case of args problem and
11653 * the parser error code otherwise
11654 */
11655
Daniel Veillard7d515752003-09-26 19:12:37 +000011656static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011657xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11658 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011659 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011660 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011661 xmlParserCtxtPtr ctxt;
11662 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011663 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011664 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011665 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011666 xmlChar start[4];
11667 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011668
11669 if (depth > 40) {
11670 return(XML_ERR_ENTITY_LOOP);
11671 }
11672
11673
11674
11675 if (list != NULL)
11676 *list = NULL;
11677 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011678 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011679 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011680 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011681
11682
11683 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011684 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011685 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011686 if (oldctxt != NULL) {
11687 ctxt->_private = oldctxt->_private;
11688 ctxt->loadsubset = oldctxt->loadsubset;
11689 ctxt->validate = oldctxt->validate;
11690 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011691 ctxt->record_info = oldctxt->record_info;
11692 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11693 ctxt->node_seq.length = oldctxt->node_seq.length;
11694 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011695 } else {
11696 /*
11697 * Doing validity checking on chunk without context
11698 * doesn't make sense
11699 */
11700 ctxt->_private = NULL;
11701 ctxt->validate = 0;
11702 ctxt->external = 2;
11703 ctxt->loadsubset = 0;
11704 }
Owen Taylor3473f882001-02-23 17:55:21 +000011705 if (sax != NULL) {
11706 oldsax = ctxt->sax;
11707 ctxt->sax = sax;
11708 if (user_data != NULL)
11709 ctxt->userData = user_data;
11710 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011711 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011712 newDoc = xmlNewDoc(BAD_CAST "1.0");
11713 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011714 ctxt->node_seq.maximum = 0;
11715 ctxt->node_seq.length = 0;
11716 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011717 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011718 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011719 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011720 newDoc->intSubset = doc->intSubset;
11721 newDoc->extSubset = doc->extSubset;
11722 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011723 xmlDictReference(newDoc->dict);
11724
Owen Taylor3473f882001-02-23 17:55:21 +000011725 if (doc->URL != NULL) {
11726 newDoc->URL = xmlStrdup(doc->URL);
11727 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011728 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11729 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011730 if (sax != NULL)
11731 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011732 ctxt->node_seq.maximum = 0;
11733 ctxt->node_seq.length = 0;
11734 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011735 xmlFreeParserCtxt(ctxt);
11736 newDoc->intSubset = NULL;
11737 newDoc->extSubset = NULL;
11738 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011739 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011740 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011741 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011742 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011743 ctxt->myDoc = doc;
11744 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011745
Daniel Veillard87a764e2001-06-20 17:41:10 +000011746 /*
11747 * Get the 4 first bytes and decode the charset
11748 * if enc != XML_CHAR_ENCODING_NONE
11749 * plug some encoding conversion routines.
11750 */
11751 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011752 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11753 start[0] = RAW;
11754 start[1] = NXT(1);
11755 start[2] = NXT(2);
11756 start[3] = NXT(3);
11757 enc = xmlDetectCharEncoding(start, 4);
11758 if (enc != XML_CHAR_ENCODING_NONE) {
11759 xmlSwitchEncoding(ctxt, enc);
11760 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011761 }
11762
Owen Taylor3473f882001-02-23 17:55:21 +000011763 /*
11764 * Parse a possible text declaration first
11765 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011766 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011767 xmlParseTextDecl(ctxt);
11768 }
11769
Owen Taylor3473f882001-02-23 17:55:21 +000011770 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011771 ctxt->depth = depth;
11772
11773 xmlParseContent(ctxt);
11774
Daniel Veillard561b7f82002-03-20 21:55:57 +000011775 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011776 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011777 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011778 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011779 }
11780 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011781 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011782 }
11783
11784 if (!ctxt->wellFormed) {
11785 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011786 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011787 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011788 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011789 } else {
11790 if (list != NULL) {
11791 xmlNodePtr cur;
11792
11793 /*
11794 * Return the newly created nodeset after unlinking it from
11795 * they pseudo parent.
11796 */
11797 cur = newDoc->children->children;
11798 *list = cur;
11799 while (cur != NULL) {
11800 cur->parent = NULL;
11801 cur = cur->next;
11802 }
11803 newDoc->children->children = NULL;
11804 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011805 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011806 }
11807 if (sax != NULL)
11808 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011809 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11810 oldctxt->node_seq.length = ctxt->node_seq.length;
11811 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011812 ctxt->node_seq.maximum = 0;
11813 ctxt->node_seq.length = 0;
11814 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011815 xmlFreeParserCtxt(ctxt);
11816 newDoc->intSubset = NULL;
11817 newDoc->extSubset = NULL;
11818 xmlFreeDoc(newDoc);
11819
11820 return(ret);
11821}
11822
Daniel Veillard81273902003-09-30 00:43:48 +000011823#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011824/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011825 * xmlParseExternalEntity:
11826 * @doc: the document the chunk pertains to
11827 * @sax: the SAX handler bloc (possibly NULL)
11828 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11829 * @depth: Used for loop detection, use 0
11830 * @URL: the URL for the entity to load
11831 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011832 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011833 *
11834 * Parse an external general entity
11835 * An external general parsed entity is well-formed if it matches the
11836 * production labeled extParsedEnt.
11837 *
11838 * [78] extParsedEnt ::= TextDecl? content
11839 *
11840 * Returns 0 if the entity is well formed, -1 in case of args problem and
11841 * the parser error code otherwise
11842 */
11843
11844int
11845xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011846 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011847 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011848 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011849}
11850
11851/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011852 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011853 * @doc: the document the chunk pertains to
11854 * @sax: the SAX handler bloc (possibly NULL)
11855 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11856 * @depth: Used for loop detection, use 0
11857 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011858 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011859 *
11860 * Parse a well-balanced chunk of an XML document
11861 * called by the parser
11862 * The allowed sequence for the Well Balanced Chunk is the one defined by
11863 * the content production in the XML grammar:
11864 *
11865 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11866 *
11867 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11868 * the parser error code otherwise
11869 */
11870
11871int
11872xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011873 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011874 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11875 depth, string, lst, 0 );
11876}
Daniel Veillard81273902003-09-30 00:43:48 +000011877#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011878
11879/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011880 * xmlParseBalancedChunkMemoryInternal:
11881 * @oldctxt: the existing parsing context
11882 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11883 * @user_data: the user data field for the parser context
11884 * @lst: the return value for the set of parsed nodes
11885 *
11886 *
11887 * Parse a well-balanced chunk of an XML document
11888 * called by the parser
11889 * The allowed sequence for the Well Balanced Chunk is the one defined by
11890 * the content production in the XML grammar:
11891 *
11892 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11893 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011894 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11895 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011896 *
11897 * In case recover is set to 1, the nodelist will not be empty even if
11898 * the parsed chunk is not well balanced.
11899 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011900static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011901xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11902 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11903 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011904 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011905 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011906 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011907 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011908 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011909 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011910 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011911
11912 if (oldctxt->depth > 40) {
11913 return(XML_ERR_ENTITY_LOOP);
11914 }
11915
11916
11917 if (lst != NULL)
11918 *lst = NULL;
11919 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011920 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011921
11922 size = xmlStrlen(string);
11923
11924 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011925 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011926 if (user_data != NULL)
11927 ctxt->userData = user_data;
11928 else
11929 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011930 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11931 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011932 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11933 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11934 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011935
11936 oldsax = ctxt->sax;
11937 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011938 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011939 ctxt->replaceEntities = oldctxt->replaceEntities;
11940 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011941
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011942 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011943 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011944 newDoc = xmlNewDoc(BAD_CAST "1.0");
11945 if (newDoc == NULL) {
11946 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011947 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011948 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011949 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011950 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011951 newDoc->dict = ctxt->dict;
11952 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011953 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011954 } else {
11955 ctxt->myDoc = oldctxt->myDoc;
11956 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011957 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011958 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011959 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11960 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011961 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011962 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011963 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011964 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011965 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011966 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011967 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011968 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011969 ctxt->myDoc->children = NULL;
11970 ctxt->myDoc->last = NULL;
11971 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011972 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011973 ctxt->instate = XML_PARSER_CONTENT;
11974 ctxt->depth = oldctxt->depth + 1;
11975
Daniel Veillard328f48c2002-11-15 15:24:34 +000011976 ctxt->validate = 0;
11977 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011978 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11979 /*
11980 * ID/IDREF registration will be done in xmlValidateElement below
11981 */
11982 ctxt->loadsubset |= XML_SKIP_IDS;
11983 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011984 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011985 ctxt->attsDefault = oldctxt->attsDefault;
11986 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011987
Daniel Veillard68e9e742002-11-16 15:35:11 +000011988 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011989 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011990 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011991 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011992 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011993 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011994 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011995 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011996 }
11997
11998 if (!ctxt->wellFormed) {
11999 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012000 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012001 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012002 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012003 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012004 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012005 }
12006
William M. Brack7b9154b2003-09-27 19:23:50 +000012007 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012008 xmlNodePtr cur;
12009
12010 /*
12011 * Return the newly created nodeset after unlinking it from
12012 * they pseudo parent.
12013 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012014 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012015 *lst = cur;
12016 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012017#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012018 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12019 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12020 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012021 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12022 oldctxt->myDoc, cur);
12023 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012024#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012025 cur->parent = NULL;
12026 cur = cur->next;
12027 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012028 ctxt->myDoc->children->children = NULL;
12029 }
12030 if (ctxt->myDoc != NULL) {
12031 xmlFreeNode(ctxt->myDoc->children);
12032 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012033 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012034 }
12035
12036 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012037 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012038 ctxt->attsDefault = NULL;
12039 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012040 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012041 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012042 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012043 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012044
12045 return(ret);
12046}
12047
Daniel Veillard29b17482004-08-16 00:39:03 +000012048/**
12049 * xmlParseInNodeContext:
12050 * @node: the context node
12051 * @data: the input string
12052 * @datalen: the input string length in bytes
12053 * @options: a combination of xmlParserOption
12054 * @lst: the return value for the set of parsed nodes
12055 *
12056 * Parse a well-balanced chunk of an XML document
12057 * within the context (DTD, namespaces, etc ...) of the given node.
12058 *
12059 * The allowed sequence for the data is a Well Balanced Chunk defined by
12060 * the content production in the XML grammar:
12061 *
12062 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12063 *
12064 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12065 * error code otherwise
12066 */
12067xmlParserErrors
12068xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12069 int options, xmlNodePtr *lst) {
12070#ifdef SAX2
12071 xmlParserCtxtPtr ctxt;
12072 xmlDocPtr doc = NULL;
12073 xmlNodePtr fake, cur;
12074 int nsnr = 0;
12075
12076 xmlParserErrors ret = XML_ERR_OK;
12077
12078 /*
12079 * check all input parameters, grab the document
12080 */
12081 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12082 return(XML_ERR_INTERNAL_ERROR);
12083 switch (node->type) {
12084 case XML_ELEMENT_NODE:
12085 case XML_ATTRIBUTE_NODE:
12086 case XML_TEXT_NODE:
12087 case XML_CDATA_SECTION_NODE:
12088 case XML_ENTITY_REF_NODE:
12089 case XML_PI_NODE:
12090 case XML_COMMENT_NODE:
12091 case XML_DOCUMENT_NODE:
12092 case XML_HTML_DOCUMENT_NODE:
12093 break;
12094 default:
12095 return(XML_ERR_INTERNAL_ERROR);
12096
12097 }
12098 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12099 (node->type != XML_DOCUMENT_NODE) &&
12100 (node->type != XML_HTML_DOCUMENT_NODE))
12101 node = node->parent;
12102 if (node == NULL)
12103 return(XML_ERR_INTERNAL_ERROR);
12104 if (node->type == XML_ELEMENT_NODE)
12105 doc = node->doc;
12106 else
12107 doc = (xmlDocPtr) node;
12108 if (doc == NULL)
12109 return(XML_ERR_INTERNAL_ERROR);
12110
12111 /*
12112 * allocate a context and set-up everything not related to the
12113 * node position in the tree
12114 */
12115 if (doc->type == XML_DOCUMENT_NODE)
12116 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12117#ifdef LIBXML_HTML_ENABLED
12118 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12119 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12120#endif
12121 else
12122 return(XML_ERR_INTERNAL_ERROR);
12123
12124 if (ctxt == NULL)
12125 return(XML_ERR_NO_MEMORY);
12126 fake = xmlNewComment(NULL);
12127 if (fake == NULL) {
12128 xmlFreeParserCtxt(ctxt);
12129 return(XML_ERR_NO_MEMORY);
12130 }
12131 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012132
12133 /*
12134 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12135 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12136 * we must wait until the last moment to free the original one.
12137 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012138 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012139 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012140 xmlDictFree(ctxt->dict);
12141 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012142 } else
12143 options |= XML_PARSE_NODICT;
12144
12145 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012146 xmlDetectSAX2(ctxt);
12147 ctxt->myDoc = doc;
12148
12149 if (node->type == XML_ELEMENT_NODE) {
12150 nodePush(ctxt, node);
12151 /*
12152 * initialize the SAX2 namespaces stack
12153 */
12154 cur = node;
12155 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12156 xmlNsPtr ns = cur->nsDef;
12157 const xmlChar *iprefix, *ihref;
12158
12159 while (ns != NULL) {
12160 if (ctxt->dict) {
12161 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12162 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12163 } else {
12164 iprefix = ns->prefix;
12165 ihref = ns->href;
12166 }
12167
12168 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12169 nsPush(ctxt, iprefix, ihref);
12170 nsnr++;
12171 }
12172 ns = ns->next;
12173 }
12174 cur = cur->parent;
12175 }
12176 ctxt->instate = XML_PARSER_CONTENT;
12177 }
12178
12179 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12180 /*
12181 * ID/IDREF registration will be done in xmlValidateElement below
12182 */
12183 ctxt->loadsubset |= XML_SKIP_IDS;
12184 }
12185
Daniel Veillard499cc922006-01-18 17:22:35 +000012186#ifdef LIBXML_HTML_ENABLED
12187 if (doc->type == XML_HTML_DOCUMENT_NODE)
12188 __htmlParseContent(ctxt);
12189 else
12190#endif
12191 xmlParseContent(ctxt);
12192
Daniel Veillard29b17482004-08-16 00:39:03 +000012193 nsPop(ctxt, nsnr);
12194 if ((RAW == '<') && (NXT(1) == '/')) {
12195 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12196 } else if (RAW != 0) {
12197 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12198 }
12199 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12200 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12201 ctxt->wellFormed = 0;
12202 }
12203
12204 if (!ctxt->wellFormed) {
12205 if (ctxt->errNo == 0)
12206 ret = XML_ERR_INTERNAL_ERROR;
12207 else
12208 ret = (xmlParserErrors)ctxt->errNo;
12209 } else {
12210 ret = XML_ERR_OK;
12211 }
12212
12213 /*
12214 * Return the newly created nodeset after unlinking it from
12215 * the pseudo sibling.
12216 */
12217
12218 cur = fake->next;
12219 fake->next = NULL;
12220 node->last = fake;
12221
12222 if (cur != NULL) {
12223 cur->prev = NULL;
12224 }
12225
12226 *lst = cur;
12227
12228 while (cur != NULL) {
12229 cur->parent = NULL;
12230 cur = cur->next;
12231 }
12232
12233 xmlUnlinkNode(fake);
12234 xmlFreeNode(fake);
12235
12236
12237 if (ret != XML_ERR_OK) {
12238 xmlFreeNodeList(*lst);
12239 *lst = NULL;
12240 }
William M. Brackc3f81342004-10-03 01:22:44 +000012241
William M. Brackb7b54de2004-10-06 16:38:01 +000012242 if (doc->dict != NULL)
12243 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012244 xmlFreeParserCtxt(ctxt);
12245
12246 return(ret);
12247#else /* !SAX2 */
12248 return(XML_ERR_INTERNAL_ERROR);
12249#endif
12250}
12251
Daniel Veillard81273902003-09-30 00:43:48 +000012252#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012253/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012254 * xmlParseBalancedChunkMemoryRecover:
12255 * @doc: the document the chunk pertains to
12256 * @sax: the SAX handler bloc (possibly NULL)
12257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12258 * @depth: Used for loop detection, use 0
12259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12260 * @lst: the return value for the set of parsed nodes
12261 * @recover: return nodes even if the data is broken (use 0)
12262 *
12263 *
12264 * Parse a well-balanced chunk of an XML document
12265 * called by the parser
12266 * The allowed sequence for the Well Balanced Chunk is the one defined by
12267 * the content production in the XML grammar:
12268 *
12269 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12270 *
12271 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12272 * the parser error code otherwise
12273 *
12274 * In case recover is set to 1, the nodelist will not be empty even if
12275 * the parsed chunk is not well balanced.
12276 */
12277int
12278xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12279 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12280 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012281 xmlParserCtxtPtr ctxt;
12282 xmlDocPtr newDoc;
12283 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012284 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012285 int size;
12286 int ret = 0;
12287
12288 if (depth > 40) {
12289 return(XML_ERR_ENTITY_LOOP);
12290 }
12291
12292
Daniel Veillardcda96922001-08-21 10:56:31 +000012293 if (lst != NULL)
12294 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012295 if (string == NULL)
12296 return(-1);
12297
12298 size = xmlStrlen(string);
12299
12300 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12301 if (ctxt == NULL) return(-1);
12302 ctxt->userData = ctxt;
12303 if (sax != NULL) {
12304 oldsax = ctxt->sax;
12305 ctxt->sax = sax;
12306 if (user_data != NULL)
12307 ctxt->userData = user_data;
12308 }
12309 newDoc = xmlNewDoc(BAD_CAST "1.0");
12310 if (newDoc == NULL) {
12311 xmlFreeParserCtxt(ctxt);
12312 return(-1);
12313 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012314 if ((doc != NULL) && (doc->dict != NULL)) {
12315 xmlDictFree(ctxt->dict);
12316 ctxt->dict = doc->dict;
12317 xmlDictReference(ctxt->dict);
12318 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12319 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12320 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12321 ctxt->dictNames = 1;
12322 } else {
12323 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12324 }
Owen Taylor3473f882001-02-23 17:55:21 +000012325 if (doc != NULL) {
12326 newDoc->intSubset = doc->intSubset;
12327 newDoc->extSubset = doc->extSubset;
12328 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012329 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12330 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012331 if (sax != NULL)
12332 ctxt->sax = oldsax;
12333 xmlFreeParserCtxt(ctxt);
12334 newDoc->intSubset = NULL;
12335 newDoc->extSubset = NULL;
12336 xmlFreeDoc(newDoc);
12337 return(-1);
12338 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012339 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12340 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012341 if (doc == NULL) {
12342 ctxt->myDoc = newDoc;
12343 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012344 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012345 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012346 /* Ensure that doc has XML spec namespace */
12347 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12348 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012349 }
12350 ctxt->instate = XML_PARSER_CONTENT;
12351 ctxt->depth = depth;
12352
12353 /*
12354 * Doing validity checking on chunk doesn't make sense
12355 */
12356 ctxt->validate = 0;
12357 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012358 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012359
Daniel Veillardb39bc392002-10-26 19:29:51 +000012360 if ( doc != NULL ){
12361 content = doc->children;
12362 doc->children = NULL;
12363 xmlParseContent(ctxt);
12364 doc->children = content;
12365 }
12366 else {
12367 xmlParseContent(ctxt);
12368 }
Owen Taylor3473f882001-02-23 17:55:21 +000012369 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012370 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012371 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012372 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012373 }
12374 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012375 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012376 }
12377
12378 if (!ctxt->wellFormed) {
12379 if (ctxt->errNo == 0)
12380 ret = 1;
12381 else
12382 ret = ctxt->errNo;
12383 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012384 ret = 0;
12385 }
12386
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012387 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12388 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012389
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012390 /*
12391 * Return the newly created nodeset after unlinking it from
12392 * they pseudo parent.
12393 */
12394 cur = newDoc->children->children;
12395 *lst = cur;
12396 while (cur != NULL) {
12397 xmlSetTreeDoc(cur, doc);
12398 cur->parent = NULL;
12399 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012400 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012401 newDoc->children->children = NULL;
12402 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012403
Owen Taylor3473f882001-02-23 17:55:21 +000012404 if (sax != NULL)
12405 ctxt->sax = oldsax;
12406 xmlFreeParserCtxt(ctxt);
12407 newDoc->intSubset = NULL;
12408 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012409 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012410 xmlFreeDoc(newDoc);
12411
12412 return(ret);
12413}
12414
12415/**
12416 * xmlSAXParseEntity:
12417 * @sax: the SAX handler block
12418 * @filename: the filename
12419 *
12420 * parse an XML external entity out of context and build a tree.
12421 * It use the given SAX function block to handle the parsing callback.
12422 * If sax is NULL, fallback to the default DOM tree building routines.
12423 *
12424 * [78] extParsedEnt ::= TextDecl? content
12425 *
12426 * This correspond to a "Well Balanced" chunk
12427 *
12428 * Returns the resulting document tree
12429 */
12430
12431xmlDocPtr
12432xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12433 xmlDocPtr ret;
12434 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012435
12436 ctxt = xmlCreateFileParserCtxt(filename);
12437 if (ctxt == NULL) {
12438 return(NULL);
12439 }
12440 if (sax != NULL) {
12441 if (ctxt->sax != NULL)
12442 xmlFree(ctxt->sax);
12443 ctxt->sax = sax;
12444 ctxt->userData = NULL;
12445 }
12446
Owen Taylor3473f882001-02-23 17:55:21 +000012447 xmlParseExtParsedEnt(ctxt);
12448
12449 if (ctxt->wellFormed)
12450 ret = ctxt->myDoc;
12451 else {
12452 ret = NULL;
12453 xmlFreeDoc(ctxt->myDoc);
12454 ctxt->myDoc = NULL;
12455 }
12456 if (sax != NULL)
12457 ctxt->sax = NULL;
12458 xmlFreeParserCtxt(ctxt);
12459
12460 return(ret);
12461}
12462
12463/**
12464 * xmlParseEntity:
12465 * @filename: the filename
12466 *
12467 * parse an XML external entity out of context and build a tree.
12468 *
12469 * [78] extParsedEnt ::= TextDecl? content
12470 *
12471 * This correspond to a "Well Balanced" chunk
12472 *
12473 * Returns the resulting document tree
12474 */
12475
12476xmlDocPtr
12477xmlParseEntity(const char *filename) {
12478 return(xmlSAXParseEntity(NULL, filename));
12479}
Daniel Veillard81273902003-09-30 00:43:48 +000012480#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012481
12482/**
12483 * xmlCreateEntityParserCtxt:
12484 * @URL: the entity URL
12485 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012486 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012487 *
12488 * Create a parser context for an external entity
12489 * Automatic support for ZLIB/Compress compressed document is provided
12490 * by default if found at compile-time.
12491 *
12492 * Returns the new parser context or NULL
12493 */
12494xmlParserCtxtPtr
12495xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12496 const xmlChar *base) {
12497 xmlParserCtxtPtr ctxt;
12498 xmlParserInputPtr inputStream;
12499 char *directory = NULL;
12500 xmlChar *uri;
12501
12502 ctxt = xmlNewParserCtxt();
12503 if (ctxt == NULL) {
12504 return(NULL);
12505 }
12506
12507 uri = xmlBuildURI(URL, base);
12508
12509 if (uri == NULL) {
12510 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12511 if (inputStream == NULL) {
12512 xmlFreeParserCtxt(ctxt);
12513 return(NULL);
12514 }
12515
12516 inputPush(ctxt, inputStream);
12517
12518 if ((ctxt->directory == NULL) && (directory == NULL))
12519 directory = xmlParserGetDirectory((char *)URL);
12520 if ((ctxt->directory == NULL) && (directory != NULL))
12521 ctxt->directory = directory;
12522 } else {
12523 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12524 if (inputStream == NULL) {
12525 xmlFree(uri);
12526 xmlFreeParserCtxt(ctxt);
12527 return(NULL);
12528 }
12529
12530 inputPush(ctxt, inputStream);
12531
12532 if ((ctxt->directory == NULL) && (directory == NULL))
12533 directory = xmlParserGetDirectory((char *)uri);
12534 if ((ctxt->directory == NULL) && (directory != NULL))
12535 ctxt->directory = directory;
12536 xmlFree(uri);
12537 }
Owen Taylor3473f882001-02-23 17:55:21 +000012538 return(ctxt);
12539}
12540
12541/************************************************************************
12542 * *
12543 * Front ends when parsing from a file *
12544 * *
12545 ************************************************************************/
12546
12547/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012548 * xmlCreateURLParserCtxt:
12549 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012550 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012551 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012552 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012553 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012554 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012555 *
12556 * Returns the new parser context or NULL
12557 */
12558xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012559xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012560{
12561 xmlParserCtxtPtr ctxt;
12562 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012563 char *directory = NULL;
12564
Owen Taylor3473f882001-02-23 17:55:21 +000012565 ctxt = xmlNewParserCtxt();
12566 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012567 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012568 return(NULL);
12569 }
12570
Daniel Veillarddf292f72005-01-16 19:00:15 +000012571 if (options)
12572 xmlCtxtUseOptions(ctxt, options);
12573 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012574
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012575 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012576 if (inputStream == NULL) {
12577 xmlFreeParserCtxt(ctxt);
12578 return(NULL);
12579 }
12580
Owen Taylor3473f882001-02-23 17:55:21 +000012581 inputPush(ctxt, inputStream);
12582 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012583 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012584 if ((ctxt->directory == NULL) && (directory != NULL))
12585 ctxt->directory = directory;
12586
12587 return(ctxt);
12588}
12589
Daniel Veillard61b93382003-11-03 14:28:31 +000012590/**
12591 * xmlCreateFileParserCtxt:
12592 * @filename: the filename
12593 *
12594 * Create a parser context for a file content.
12595 * Automatic support for ZLIB/Compress compressed document is provided
12596 * by default if found at compile-time.
12597 *
12598 * Returns the new parser context or NULL
12599 */
12600xmlParserCtxtPtr
12601xmlCreateFileParserCtxt(const char *filename)
12602{
12603 return(xmlCreateURLParserCtxt(filename, 0));
12604}
12605
Daniel Veillard81273902003-09-30 00:43:48 +000012606#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012607/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012608 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012609 * @sax: the SAX handler block
12610 * @filename: the filename
12611 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12612 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012613 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012614 *
12615 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12616 * compressed document is provided by default if found at compile-time.
12617 * It use the given SAX function block to handle the parsing callback.
12618 * If sax is NULL, fallback to the default DOM tree building routines.
12619 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012620 * User data (void *) is stored within the parser context in the
12621 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012622 *
Owen Taylor3473f882001-02-23 17:55:21 +000012623 * Returns the resulting document tree
12624 */
12625
12626xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012627xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12628 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012629 xmlDocPtr ret;
12630 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012631
Daniel Veillard635ef722001-10-29 11:48:19 +000012632 xmlInitParser();
12633
Owen Taylor3473f882001-02-23 17:55:21 +000012634 ctxt = xmlCreateFileParserCtxt(filename);
12635 if (ctxt == NULL) {
12636 return(NULL);
12637 }
12638 if (sax != NULL) {
12639 if (ctxt->sax != NULL)
12640 xmlFree(ctxt->sax);
12641 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012642 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012643 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012644 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012645 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012646 }
Owen Taylor3473f882001-02-23 17:55:21 +000012647
Daniel Veillard37d2d162008-03-14 10:54:00 +000012648 if (ctxt->directory == NULL)
12649 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012650
Daniel Veillarddad3f682002-11-17 16:47:27 +000012651 ctxt->recovery = recovery;
12652
Owen Taylor3473f882001-02-23 17:55:21 +000012653 xmlParseDocument(ctxt);
12654
William M. Brackc07329e2003-09-08 01:57:30 +000012655 if ((ctxt->wellFormed) || recovery) {
12656 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012657 if (ret != NULL) {
12658 if (ctxt->input->buf->compressed > 0)
12659 ret->compression = 9;
12660 else
12661 ret->compression = ctxt->input->buf->compressed;
12662 }
William M. Brackc07329e2003-09-08 01:57:30 +000012663 }
Owen Taylor3473f882001-02-23 17:55:21 +000012664 else {
12665 ret = NULL;
12666 xmlFreeDoc(ctxt->myDoc);
12667 ctxt->myDoc = NULL;
12668 }
12669 if (sax != NULL)
12670 ctxt->sax = NULL;
12671 xmlFreeParserCtxt(ctxt);
12672
12673 return(ret);
12674}
12675
12676/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012677 * xmlSAXParseFile:
12678 * @sax: the SAX handler block
12679 * @filename: the filename
12680 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12681 * documents
12682 *
12683 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12684 * compressed document is provided by default if found at compile-time.
12685 * It use the given SAX function block to handle the parsing callback.
12686 * If sax is NULL, fallback to the default DOM tree building routines.
12687 *
12688 * Returns the resulting document tree
12689 */
12690
12691xmlDocPtr
12692xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12693 int recovery) {
12694 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12695}
12696
12697/**
Owen Taylor3473f882001-02-23 17:55:21 +000012698 * xmlRecoverDoc:
12699 * @cur: a pointer to an array of xmlChar
12700 *
12701 * parse an XML in-memory document and build a tree.
12702 * In the case the document is not Well Formed, a tree is built anyway
12703 *
12704 * Returns the resulting document tree
12705 */
12706
12707xmlDocPtr
12708xmlRecoverDoc(xmlChar *cur) {
12709 return(xmlSAXParseDoc(NULL, cur, 1));
12710}
12711
12712/**
12713 * xmlParseFile:
12714 * @filename: the filename
12715 *
12716 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12717 * compressed document is provided by default if found at compile-time.
12718 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012719 * Returns the resulting document tree if the file was wellformed,
12720 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012721 */
12722
12723xmlDocPtr
12724xmlParseFile(const char *filename) {
12725 return(xmlSAXParseFile(NULL, filename, 0));
12726}
12727
12728/**
12729 * xmlRecoverFile:
12730 * @filename: the filename
12731 *
12732 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12733 * compressed document is provided by default if found at compile-time.
12734 * In the case the document is not Well Formed, a tree is built anyway
12735 *
12736 * Returns the resulting document tree
12737 */
12738
12739xmlDocPtr
12740xmlRecoverFile(const char *filename) {
12741 return(xmlSAXParseFile(NULL, filename, 1));
12742}
12743
12744
12745/**
12746 * xmlSetupParserForBuffer:
12747 * @ctxt: an XML parser context
12748 * @buffer: a xmlChar * buffer
12749 * @filename: a file name
12750 *
12751 * Setup the parser context to parse a new buffer; Clears any prior
12752 * contents from the parser context. The buffer parameter must not be
12753 * NULL, but the filename parameter can be
12754 */
12755void
12756xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12757 const char* filename)
12758{
12759 xmlParserInputPtr input;
12760
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012761 if ((ctxt == NULL) || (buffer == NULL))
12762 return;
12763
Owen Taylor3473f882001-02-23 17:55:21 +000012764 input = xmlNewInputStream(ctxt);
12765 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012766 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012767 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012768 return;
12769 }
12770
12771 xmlClearParserCtxt(ctxt);
12772 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012773 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012774 input->base = buffer;
12775 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012776 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012777 inputPush(ctxt, input);
12778}
12779
12780/**
12781 * xmlSAXUserParseFile:
12782 * @sax: a SAX handler
12783 * @user_data: The user data returned on SAX callbacks
12784 * @filename: a file name
12785 *
12786 * parse an XML file and call the given SAX handler routines.
12787 * Automatic support for ZLIB/Compress compressed document is provided
12788 *
12789 * Returns 0 in case of success or a error number otherwise
12790 */
12791int
12792xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12793 const char *filename) {
12794 int ret = 0;
12795 xmlParserCtxtPtr ctxt;
12796
12797 ctxt = xmlCreateFileParserCtxt(filename);
12798 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012799 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012800 xmlFree(ctxt->sax);
12801 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012802 xmlDetectSAX2(ctxt);
12803
Owen Taylor3473f882001-02-23 17:55:21 +000012804 if (user_data != NULL)
12805 ctxt->userData = user_data;
12806
12807 xmlParseDocument(ctxt);
12808
12809 if (ctxt->wellFormed)
12810 ret = 0;
12811 else {
12812 if (ctxt->errNo != 0)
12813 ret = ctxt->errNo;
12814 else
12815 ret = -1;
12816 }
12817 if (sax != NULL)
12818 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012819 if (ctxt->myDoc != NULL) {
12820 xmlFreeDoc(ctxt->myDoc);
12821 ctxt->myDoc = NULL;
12822 }
Owen Taylor3473f882001-02-23 17:55:21 +000012823 xmlFreeParserCtxt(ctxt);
12824
12825 return ret;
12826}
Daniel Veillard81273902003-09-30 00:43:48 +000012827#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829/************************************************************************
12830 * *
12831 * Front ends when parsing from memory *
12832 * *
12833 ************************************************************************/
12834
12835/**
12836 * xmlCreateMemoryParserCtxt:
12837 * @buffer: a pointer to a char array
12838 * @size: the size of the array
12839 *
12840 * Create a parser context for an XML in-memory document.
12841 *
12842 * Returns the new parser context or NULL
12843 */
12844xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012845xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012846 xmlParserCtxtPtr ctxt;
12847 xmlParserInputPtr input;
12848 xmlParserInputBufferPtr buf;
12849
12850 if (buffer == NULL)
12851 return(NULL);
12852 if (size <= 0)
12853 return(NULL);
12854
12855 ctxt = xmlNewParserCtxt();
12856 if (ctxt == NULL)
12857 return(NULL);
12858
Daniel Veillard53350552003-09-18 13:35:51 +000012859 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012860 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012861 if (buf == NULL) {
12862 xmlFreeParserCtxt(ctxt);
12863 return(NULL);
12864 }
Owen Taylor3473f882001-02-23 17:55:21 +000012865
12866 input = xmlNewInputStream(ctxt);
12867 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012868 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012869 xmlFreeParserCtxt(ctxt);
12870 return(NULL);
12871 }
12872
12873 input->filename = NULL;
12874 input->buf = buf;
12875 input->base = input->buf->buffer->content;
12876 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012877 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012878
12879 inputPush(ctxt, input);
12880 return(ctxt);
12881}
12882
Daniel Veillard81273902003-09-30 00:43:48 +000012883#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012884/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012885 * xmlSAXParseMemoryWithData:
12886 * @sax: the SAX handler block
12887 * @buffer: an pointer to a char array
12888 * @size: the size of the array
12889 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12890 * documents
12891 * @data: the userdata
12892 *
12893 * parse an XML in-memory block and use the given SAX function block
12894 * to handle the parsing callback. If sax is NULL, fallback to the default
12895 * DOM tree building routines.
12896 *
12897 * User data (void *) is stored within the parser context in the
12898 * context's _private member, so it is available nearly everywhere in libxml
12899 *
12900 * Returns the resulting document tree
12901 */
12902
12903xmlDocPtr
12904xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12905 int size, int recovery, void *data) {
12906 xmlDocPtr ret;
12907 xmlParserCtxtPtr ctxt;
12908
12909 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12910 if (ctxt == NULL) return(NULL);
12911 if (sax != NULL) {
12912 if (ctxt->sax != NULL)
12913 xmlFree(ctxt->sax);
12914 ctxt->sax = sax;
12915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012916 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012917 if (data!=NULL) {
12918 ctxt->_private=data;
12919 }
12920
Daniel Veillardadba5f12003-04-04 16:09:01 +000012921 ctxt->recovery = recovery;
12922
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012923 xmlParseDocument(ctxt);
12924
12925 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12926 else {
12927 ret = NULL;
12928 xmlFreeDoc(ctxt->myDoc);
12929 ctxt->myDoc = NULL;
12930 }
12931 if (sax != NULL)
12932 ctxt->sax = NULL;
12933 xmlFreeParserCtxt(ctxt);
12934
12935 return(ret);
12936}
12937
12938/**
Owen Taylor3473f882001-02-23 17:55:21 +000012939 * xmlSAXParseMemory:
12940 * @sax: the SAX handler block
12941 * @buffer: an pointer to a char array
12942 * @size: the size of the array
12943 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12944 * documents
12945 *
12946 * parse an XML in-memory block and use the given SAX function block
12947 * to handle the parsing callback. If sax is NULL, fallback to the default
12948 * DOM tree building routines.
12949 *
12950 * Returns the resulting document tree
12951 */
12952xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012953xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12954 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012955 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012956}
12957
12958/**
12959 * xmlParseMemory:
12960 * @buffer: an pointer to a char array
12961 * @size: the size of the array
12962 *
12963 * parse an XML in-memory block and build a tree.
12964 *
12965 * Returns the resulting document tree
12966 */
12967
Daniel Veillard50822cb2001-07-26 20:05:51 +000012968xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012969 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12970}
12971
12972/**
12973 * xmlRecoverMemory:
12974 * @buffer: an pointer to a char array
12975 * @size: the size of the array
12976 *
12977 * parse an XML in-memory block and build a tree.
12978 * In the case the document is not Well Formed, a tree is built anyway
12979 *
12980 * Returns the resulting document tree
12981 */
12982
Daniel Veillard50822cb2001-07-26 20:05:51 +000012983xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012984 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12985}
12986
12987/**
12988 * xmlSAXUserParseMemory:
12989 * @sax: a SAX handler
12990 * @user_data: The user data returned on SAX callbacks
12991 * @buffer: an in-memory XML document input
12992 * @size: the length of the XML document in bytes
12993 *
12994 * A better SAX parsing routine.
12995 * parse an XML in-memory buffer and call the given SAX handler routines.
12996 *
12997 * Returns 0 in case of success or a error number otherwise
12998 */
12999int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013000 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013001 int ret = 0;
13002 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013003
13004 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13005 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013006 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13007 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013008 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013009 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013010
Daniel Veillard30211a02001-04-26 09:33:18 +000013011 if (user_data != NULL)
13012 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013013
13014 xmlParseDocument(ctxt);
13015
13016 if (ctxt->wellFormed)
13017 ret = 0;
13018 else {
13019 if (ctxt->errNo != 0)
13020 ret = ctxt->errNo;
13021 else
13022 ret = -1;
13023 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013024 if (sax != NULL)
13025 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013026 if (ctxt->myDoc != NULL) {
13027 xmlFreeDoc(ctxt->myDoc);
13028 ctxt->myDoc = NULL;
13029 }
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlFreeParserCtxt(ctxt);
13031
13032 return ret;
13033}
Daniel Veillard81273902003-09-30 00:43:48 +000013034#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013035
13036/**
13037 * xmlCreateDocParserCtxt:
13038 * @cur: a pointer to an array of xmlChar
13039 *
13040 * Creates a parser context for an XML in-memory document.
13041 *
13042 * Returns the new parser context or NULL
13043 */
13044xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013045xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013046 int len;
13047
13048 if (cur == NULL)
13049 return(NULL);
13050 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013051 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013052}
13053
Daniel Veillard81273902003-09-30 00:43:48 +000013054#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013055/**
13056 * xmlSAXParseDoc:
13057 * @sax: the SAX handler block
13058 * @cur: a pointer to an array of xmlChar
13059 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13060 * documents
13061 *
13062 * parse an XML in-memory document and build a tree.
13063 * It use the given SAX function block to handle the parsing callback.
13064 * If sax is NULL, fallback to the default DOM tree building routines.
13065 *
13066 * Returns the resulting document tree
13067 */
13068
13069xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013070xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013071 xmlDocPtr ret;
13072 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013073 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013074
Daniel Veillard38936062004-11-04 17:45:11 +000013075 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013076
13077
13078 ctxt = xmlCreateDocParserCtxt(cur);
13079 if (ctxt == NULL) return(NULL);
13080 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013081 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013082 ctxt->sax = sax;
13083 ctxt->userData = NULL;
13084 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013085 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013086
13087 xmlParseDocument(ctxt);
13088 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13089 else {
13090 ret = NULL;
13091 xmlFreeDoc(ctxt->myDoc);
13092 ctxt->myDoc = NULL;
13093 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013094 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013095 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013096 xmlFreeParserCtxt(ctxt);
13097
13098 return(ret);
13099}
13100
13101/**
13102 * xmlParseDoc:
13103 * @cur: a pointer to an array of xmlChar
13104 *
13105 * parse an XML in-memory document and build a tree.
13106 *
13107 * Returns the resulting document tree
13108 */
13109
13110xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013111xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013112 return(xmlSAXParseDoc(NULL, cur, 0));
13113}
Daniel Veillard81273902003-09-30 00:43:48 +000013114#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013115
Daniel Veillard81273902003-09-30 00:43:48 +000013116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013117/************************************************************************
13118 * *
13119 * Specific function to keep track of entities references *
13120 * and used by the XSLT debugger *
13121 * *
13122 ************************************************************************/
13123
13124static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13125
13126/**
13127 * xmlAddEntityReference:
13128 * @ent : A valid entity
13129 * @firstNode : A valid first node for children of entity
13130 * @lastNode : A valid last node of children entity
13131 *
13132 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13133 */
13134static void
13135xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13136 xmlNodePtr lastNode)
13137{
13138 if (xmlEntityRefFunc != NULL) {
13139 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13140 }
13141}
13142
13143
13144/**
13145 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013146 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013147 *
13148 * Set the function to call call back when a xml reference has been made
13149 */
13150void
13151xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13152{
13153 xmlEntityRefFunc = func;
13154}
Daniel Veillard81273902003-09-30 00:43:48 +000013155#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013156
13157/************************************************************************
13158 * *
13159 * Miscellaneous *
13160 * *
13161 ************************************************************************/
13162
13163#ifdef LIBXML_XPATH_ENABLED
13164#include <libxml/xpath.h>
13165#endif
13166
Daniel Veillardffa3c742005-07-21 13:24:09 +000013167extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013168static int xmlParserInitialized = 0;
13169
13170/**
13171 * xmlInitParser:
13172 *
13173 * Initialization function for the XML parser.
13174 * This is not reentrant. Call once before processing in case of
13175 * use in multithreaded programs.
13176 */
13177
13178void
13179xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013180 if (xmlParserInitialized != 0)
13181 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013182
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013183#ifdef LIBXML_THREAD_ENABLED
13184 __xmlGlobalInitMutexLock();
13185 if (xmlParserInitialized == 0) {
13186#endif
13187 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13188 (xmlGenericError == NULL))
13189 initGenericErrorDefaultFunc(NULL);
13190 xmlInitGlobals();
13191 xmlInitThreads();
13192 xmlInitMemory();
13193 xmlInitCharEncodingHandlers();
13194 xmlDefaultSAXHandlerInit();
13195 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013196#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013197 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013198#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013199#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013200 htmlInitAutoClose();
13201 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013202#endif
13203#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013204 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013205#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013206 xmlParserInitialized = 1;
13207#ifdef LIBXML_THREAD_ENABLED
13208 }
13209 __xmlGlobalInitMutexUnlock();
13210#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013211}
13212
13213/**
13214 * xmlCleanupParser:
13215 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013216 * This function name is somewhat misleading. It does not clean up
13217 * parser state, it cleans up memory allocated by the library itself.
13218 * It is a cleanup function for the XML library. It tries to reclaim all
13219 * related global memory allocated for the library processing.
13220 * It doesn't deallocate any document related memory. One should
13221 * call xmlCleanupParser() only when the process has finished using
13222 * the library and all XML/HTML documents built with it.
13223 * See also xmlInitParser() which has the opposite function of preparing
13224 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013225 */
13226
13227void
13228xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013229 if (!xmlParserInitialized)
13230 return;
13231
Owen Taylor3473f882001-02-23 17:55:21 +000013232 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013233#ifdef LIBXML_CATALOG_ENABLED
13234 xmlCatalogCleanup();
13235#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013236 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013237 xmlCleanupInputCallbacks();
13238#ifdef LIBXML_OUTPUT_ENABLED
13239 xmlCleanupOutputCallbacks();
13240#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013241#ifdef LIBXML_SCHEMAS_ENABLED
13242 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013243 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013244#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013245 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013246 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013247 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013248 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013249 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013250}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013251
13252/************************************************************************
13253 * *
13254 * New set (2.6.0) of simpler and more flexible APIs *
13255 * *
13256 ************************************************************************/
13257
13258/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013259 * DICT_FREE:
13260 * @str: a string
13261 *
13262 * Free a string if it is not owned by the "dict" dictionnary in the
13263 * current scope
13264 */
13265#define DICT_FREE(str) \
13266 if ((str) && ((!dict) || \
13267 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13268 xmlFree((char *)(str));
13269
13270/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271 * xmlCtxtReset:
13272 * @ctxt: an XML parser context
13273 *
13274 * Reset a parser context
13275 */
13276void
13277xmlCtxtReset(xmlParserCtxtPtr ctxt)
13278{
13279 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013280 xmlDictPtr dict;
13281
13282 if (ctxt == NULL)
13283 return;
13284
13285 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013286
13287 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13288 xmlFreeInputStream(input);
13289 }
13290 ctxt->inputNr = 0;
13291 ctxt->input = NULL;
13292
13293 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013294 if (ctxt->spaceTab != NULL) {
13295 ctxt->spaceTab[0] = -1;
13296 ctxt->space = &ctxt->spaceTab[0];
13297 } else {
13298 ctxt->space = NULL;
13299 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013300
13301
13302 ctxt->nodeNr = 0;
13303 ctxt->node = NULL;
13304
13305 ctxt->nameNr = 0;
13306 ctxt->name = NULL;
13307
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013308 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013309 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013310 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013311 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013312 DICT_FREE(ctxt->directory);
13313 ctxt->directory = NULL;
13314 DICT_FREE(ctxt->extSubURI);
13315 ctxt->extSubURI = NULL;
13316 DICT_FREE(ctxt->extSubSystem);
13317 ctxt->extSubSystem = NULL;
13318 if (ctxt->myDoc != NULL)
13319 xmlFreeDoc(ctxt->myDoc);
13320 ctxt->myDoc = NULL;
13321
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013322 ctxt->standalone = -1;
13323 ctxt->hasExternalSubset = 0;
13324 ctxt->hasPErefs = 0;
13325 ctxt->html = 0;
13326 ctxt->external = 0;
13327 ctxt->instate = XML_PARSER_START;
13328 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013329
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013330 ctxt->wellFormed = 1;
13331 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013332 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013333 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013334#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013335 ctxt->vctxt.userData = ctxt;
13336 ctxt->vctxt.error = xmlParserValidityError;
13337 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013338#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013339 ctxt->record_info = 0;
13340 ctxt->nbChars = 0;
13341 ctxt->checkIndex = 0;
13342 ctxt->inSubset = 0;
13343 ctxt->errNo = XML_ERR_OK;
13344 ctxt->depth = 0;
13345 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13346 ctxt->catalogs = NULL;
13347 xmlInitNodeInfoSeq(&ctxt->node_seq);
13348
13349 if (ctxt->attsDefault != NULL) {
13350 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13351 ctxt->attsDefault = NULL;
13352 }
13353 if (ctxt->attsSpecial != NULL) {
13354 xmlHashFree(ctxt->attsSpecial, NULL);
13355 ctxt->attsSpecial = NULL;
13356 }
13357
Daniel Veillard4432df22003-09-28 18:58:27 +000013358#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013359 if (ctxt->catalogs != NULL)
13360 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013361#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013362 if (ctxt->lastError.code != XML_ERR_OK)
13363 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013364}
13365
13366/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013367 * xmlCtxtResetPush:
13368 * @ctxt: an XML parser context
13369 * @chunk: a pointer to an array of chars
13370 * @size: number of chars in the array
13371 * @filename: an optional file name or URI
13372 * @encoding: the document encoding, or NULL
13373 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013374 * Reset a push parser context
13375 *
13376 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013377 */
13378int
13379xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13380 int size, const char *filename, const char *encoding)
13381{
13382 xmlParserInputPtr inputStream;
13383 xmlParserInputBufferPtr buf;
13384 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13385
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013386 if (ctxt == NULL)
13387 return(1);
13388
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013389 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13390 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13391
13392 buf = xmlAllocParserInputBuffer(enc);
13393 if (buf == NULL)
13394 return(1);
13395
13396 if (ctxt == NULL) {
13397 xmlFreeParserInputBuffer(buf);
13398 return(1);
13399 }
13400
13401 xmlCtxtReset(ctxt);
13402
13403 if (ctxt->pushTab == NULL) {
13404 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13405 sizeof(xmlChar *));
13406 if (ctxt->pushTab == NULL) {
13407 xmlErrMemory(ctxt, NULL);
13408 xmlFreeParserInputBuffer(buf);
13409 return(1);
13410 }
13411 }
13412
13413 if (filename == NULL) {
13414 ctxt->directory = NULL;
13415 } else {
13416 ctxt->directory = xmlParserGetDirectory(filename);
13417 }
13418
13419 inputStream = xmlNewInputStream(ctxt);
13420 if (inputStream == NULL) {
13421 xmlFreeParserInputBuffer(buf);
13422 return(1);
13423 }
13424
13425 if (filename == NULL)
13426 inputStream->filename = NULL;
13427 else
13428 inputStream->filename = (char *)
13429 xmlCanonicPath((const xmlChar *) filename);
13430 inputStream->buf = buf;
13431 inputStream->base = inputStream->buf->buffer->content;
13432 inputStream->cur = inputStream->buf->buffer->content;
13433 inputStream->end =
13434 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13435
13436 inputPush(ctxt, inputStream);
13437
13438 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13439 (ctxt->input->buf != NULL)) {
13440 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13441 int cur = ctxt->input->cur - ctxt->input->base;
13442
13443 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13444
13445 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13446 ctxt->input->cur = ctxt->input->base + cur;
13447 ctxt->input->end =
13448 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13449 use];
13450#ifdef DEBUG_PUSH
13451 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13452#endif
13453 }
13454
13455 if (encoding != NULL) {
13456 xmlCharEncodingHandlerPtr hdlr;
13457
13458 hdlr = xmlFindCharEncodingHandler(encoding);
13459 if (hdlr != NULL) {
13460 xmlSwitchToEncoding(ctxt, hdlr);
13461 } else {
13462 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13463 "Unsupported encoding %s\n", BAD_CAST encoding);
13464 }
13465 } else if (enc != XML_CHAR_ENCODING_NONE) {
13466 xmlSwitchEncoding(ctxt, enc);
13467 }
13468
13469 return(0);
13470}
13471
13472/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013473 * xmlCtxtUseOptions:
13474 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013475 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013476 *
13477 * Applies the options to the parser context
13478 *
13479 * Returns 0 in case of success, the set of unknown or unimplemented options
13480 * in case of error.
13481 */
13482int
13483xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13484{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013485 if (ctxt == NULL)
13486 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013487 if (options & XML_PARSE_RECOVER) {
13488 ctxt->recovery = 1;
13489 options -= XML_PARSE_RECOVER;
13490 } else
13491 ctxt->recovery = 0;
13492 if (options & XML_PARSE_DTDLOAD) {
13493 ctxt->loadsubset = XML_DETECT_IDS;
13494 options -= XML_PARSE_DTDLOAD;
13495 } else
13496 ctxt->loadsubset = 0;
13497 if (options & XML_PARSE_DTDATTR) {
13498 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13499 options -= XML_PARSE_DTDATTR;
13500 }
13501 if (options & XML_PARSE_NOENT) {
13502 ctxt->replaceEntities = 1;
13503 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13504 options -= XML_PARSE_NOENT;
13505 } else
13506 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013507 if (options & XML_PARSE_PEDANTIC) {
13508 ctxt->pedantic = 1;
13509 options -= XML_PARSE_PEDANTIC;
13510 } else
13511 ctxt->pedantic = 0;
13512 if (options & XML_PARSE_NOBLANKS) {
13513 ctxt->keepBlanks = 0;
13514 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13515 options -= XML_PARSE_NOBLANKS;
13516 } else
13517 ctxt->keepBlanks = 1;
13518 if (options & XML_PARSE_DTDVALID) {
13519 ctxt->validate = 1;
13520 if (options & XML_PARSE_NOWARNING)
13521 ctxt->vctxt.warning = NULL;
13522 if (options & XML_PARSE_NOERROR)
13523 ctxt->vctxt.error = NULL;
13524 options -= XML_PARSE_DTDVALID;
13525 } else
13526 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013527 if (options & XML_PARSE_NOWARNING) {
13528 ctxt->sax->warning = NULL;
13529 options -= XML_PARSE_NOWARNING;
13530 }
13531 if (options & XML_PARSE_NOERROR) {
13532 ctxt->sax->error = NULL;
13533 ctxt->sax->fatalError = NULL;
13534 options -= XML_PARSE_NOERROR;
13535 }
Daniel Veillard81273902003-09-30 00:43:48 +000013536#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013537 if (options & XML_PARSE_SAX1) {
13538 ctxt->sax->startElement = xmlSAX2StartElement;
13539 ctxt->sax->endElement = xmlSAX2EndElement;
13540 ctxt->sax->startElementNs = NULL;
13541 ctxt->sax->endElementNs = NULL;
13542 ctxt->sax->initialized = 1;
13543 options -= XML_PARSE_SAX1;
13544 }
Daniel Veillard81273902003-09-30 00:43:48 +000013545#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013546 if (options & XML_PARSE_NODICT) {
13547 ctxt->dictNames = 0;
13548 options -= XML_PARSE_NODICT;
13549 } else {
13550 ctxt->dictNames = 1;
13551 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013552 if (options & XML_PARSE_NOCDATA) {
13553 ctxt->sax->cdataBlock = NULL;
13554 options -= XML_PARSE_NOCDATA;
13555 }
13556 if (options & XML_PARSE_NSCLEAN) {
13557 ctxt->options |= XML_PARSE_NSCLEAN;
13558 options -= XML_PARSE_NSCLEAN;
13559 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013560 if (options & XML_PARSE_NONET) {
13561 ctxt->options |= XML_PARSE_NONET;
13562 options -= XML_PARSE_NONET;
13563 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013564 if (options & XML_PARSE_COMPACT) {
13565 ctxt->options |= XML_PARSE_COMPACT;
13566 options -= XML_PARSE_COMPACT;
13567 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013568 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013569 return (options);
13570}
13571
13572/**
13573 * xmlDoRead:
13574 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013575 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013576 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013577 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013578 * @reuse: keep the context for reuse
13579 *
13580 * Common front-end for the xmlRead functions
13581 *
13582 * Returns the resulting document tree or NULL
13583 */
13584static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013585xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13586 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013587{
13588 xmlDocPtr ret;
13589
13590 xmlCtxtUseOptions(ctxt, options);
13591 if (encoding != NULL) {
13592 xmlCharEncodingHandlerPtr hdlr;
13593
13594 hdlr = xmlFindCharEncodingHandler(encoding);
13595 if (hdlr != NULL)
13596 xmlSwitchToEncoding(ctxt, hdlr);
13597 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013598 if ((URL != NULL) && (ctxt->input != NULL) &&
13599 (ctxt->input->filename == NULL))
13600 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013601 xmlParseDocument(ctxt);
13602 if ((ctxt->wellFormed) || ctxt->recovery)
13603 ret = ctxt->myDoc;
13604 else {
13605 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013606 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013607 xmlFreeDoc(ctxt->myDoc);
13608 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013609 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013610 ctxt->myDoc = NULL;
13611 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013612 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013613 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013614
13615 return (ret);
13616}
13617
13618/**
13619 * xmlReadDoc:
13620 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013621 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013622 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013623 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013624 *
13625 * parse an XML in-memory document and build a tree.
13626 *
13627 * Returns the resulting document tree
13628 */
13629xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013630xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013631{
13632 xmlParserCtxtPtr ctxt;
13633
13634 if (cur == NULL)
13635 return (NULL);
13636
13637 ctxt = xmlCreateDocParserCtxt(cur);
13638 if (ctxt == NULL)
13639 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013640 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013641}
13642
13643/**
13644 * xmlReadFile:
13645 * @filename: a file or URL
13646 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013647 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013648 *
13649 * parse an XML file from the filesystem or the network.
13650 *
13651 * Returns the resulting document tree
13652 */
13653xmlDocPtr
13654xmlReadFile(const char *filename, const char *encoding, int options)
13655{
13656 xmlParserCtxtPtr ctxt;
13657
Daniel Veillard61b93382003-11-03 14:28:31 +000013658 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013659 if (ctxt == NULL)
13660 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013661 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013662}
13663
13664/**
13665 * xmlReadMemory:
13666 * @buffer: a pointer to a char array
13667 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013668 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013669 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013670 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013671 *
13672 * parse an XML in-memory document and build a tree.
13673 *
13674 * Returns the resulting document tree
13675 */
13676xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013677xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013678{
13679 xmlParserCtxtPtr ctxt;
13680
13681 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13682 if (ctxt == NULL)
13683 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013684 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013685}
13686
13687/**
13688 * xmlReadFd:
13689 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013690 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013691 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013692 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013693 *
13694 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013695 * NOTE that the file descriptor will not be closed when the
13696 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013697 *
13698 * Returns the resulting document tree
13699 */
13700xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013701xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013702{
13703 xmlParserCtxtPtr ctxt;
13704 xmlParserInputBufferPtr input;
13705 xmlParserInputPtr stream;
13706
13707 if (fd < 0)
13708 return (NULL);
13709
13710 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13711 if (input == NULL)
13712 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013713 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013714 ctxt = xmlNewParserCtxt();
13715 if (ctxt == NULL) {
13716 xmlFreeParserInputBuffer(input);
13717 return (NULL);
13718 }
13719 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13720 if (stream == NULL) {
13721 xmlFreeParserInputBuffer(input);
13722 xmlFreeParserCtxt(ctxt);
13723 return (NULL);
13724 }
13725 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013726 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013727}
13728
13729/**
13730 * xmlReadIO:
13731 * @ioread: an I/O read function
13732 * @ioclose: an I/O close function
13733 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013734 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013735 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013736 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013737 *
13738 * parse an XML document from I/O functions and source and build a tree.
13739 *
13740 * Returns the resulting document tree
13741 */
13742xmlDocPtr
13743xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013744 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013745{
13746 xmlParserCtxtPtr ctxt;
13747 xmlParserInputBufferPtr input;
13748 xmlParserInputPtr stream;
13749
13750 if (ioread == NULL)
13751 return (NULL);
13752
13753 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13754 XML_CHAR_ENCODING_NONE);
13755 if (input == NULL)
13756 return (NULL);
13757 ctxt = xmlNewParserCtxt();
13758 if (ctxt == NULL) {
13759 xmlFreeParserInputBuffer(input);
13760 return (NULL);
13761 }
13762 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13763 if (stream == NULL) {
13764 xmlFreeParserInputBuffer(input);
13765 xmlFreeParserCtxt(ctxt);
13766 return (NULL);
13767 }
13768 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013769 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013770}
13771
13772/**
13773 * xmlCtxtReadDoc:
13774 * @ctxt: an XML parser context
13775 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013776 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013777 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013778 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013779 *
13780 * parse an XML in-memory document and build a tree.
13781 * This reuses the existing @ctxt parser context
13782 *
13783 * Returns the resulting document tree
13784 */
13785xmlDocPtr
13786xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013787 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013788{
13789 xmlParserInputPtr stream;
13790
13791 if (cur == NULL)
13792 return (NULL);
13793 if (ctxt == NULL)
13794 return (NULL);
13795
13796 xmlCtxtReset(ctxt);
13797
13798 stream = xmlNewStringInputStream(ctxt, cur);
13799 if (stream == NULL) {
13800 return (NULL);
13801 }
13802 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013803 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013804}
13805
13806/**
13807 * xmlCtxtReadFile:
13808 * @ctxt: an XML parser context
13809 * @filename: a file or URL
13810 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013811 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013812 *
13813 * parse an XML file from the filesystem or the network.
13814 * This reuses the existing @ctxt parser context
13815 *
13816 * Returns the resulting document tree
13817 */
13818xmlDocPtr
13819xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13820 const char *encoding, int options)
13821{
13822 xmlParserInputPtr stream;
13823
13824 if (filename == NULL)
13825 return (NULL);
13826 if (ctxt == NULL)
13827 return (NULL);
13828
13829 xmlCtxtReset(ctxt);
13830
Daniel Veillard29614c72004-11-26 10:47:26 +000013831 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013832 if (stream == NULL) {
13833 return (NULL);
13834 }
13835 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013836 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013837}
13838
13839/**
13840 * xmlCtxtReadMemory:
13841 * @ctxt: an XML parser context
13842 * @buffer: a pointer to a char array
13843 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013844 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013845 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013846 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013847 *
13848 * parse an XML in-memory document and build a tree.
13849 * This reuses the existing @ctxt parser context
13850 *
13851 * Returns the resulting document tree
13852 */
13853xmlDocPtr
13854xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013855 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013856{
13857 xmlParserInputBufferPtr input;
13858 xmlParserInputPtr stream;
13859
13860 if (ctxt == NULL)
13861 return (NULL);
13862 if (buffer == NULL)
13863 return (NULL);
13864
13865 xmlCtxtReset(ctxt);
13866
13867 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13868 if (input == NULL) {
13869 return(NULL);
13870 }
13871
13872 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13873 if (stream == NULL) {
13874 xmlFreeParserInputBuffer(input);
13875 return(NULL);
13876 }
13877
13878 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013879 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013880}
13881
13882/**
13883 * xmlCtxtReadFd:
13884 * @ctxt: an XML parser context
13885 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013886 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013887 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013888 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013889 *
13890 * parse an XML from a file descriptor and build a tree.
13891 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013892 * NOTE that the file descriptor will not be closed when the
13893 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013894 *
13895 * Returns the resulting document tree
13896 */
13897xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013898xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13899 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013900{
13901 xmlParserInputBufferPtr input;
13902 xmlParserInputPtr stream;
13903
13904 if (fd < 0)
13905 return (NULL);
13906 if (ctxt == NULL)
13907 return (NULL);
13908
13909 xmlCtxtReset(ctxt);
13910
13911
13912 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13913 if (input == NULL)
13914 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013915 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013916 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13917 if (stream == NULL) {
13918 xmlFreeParserInputBuffer(input);
13919 return (NULL);
13920 }
13921 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013922 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013923}
13924
13925/**
13926 * xmlCtxtReadIO:
13927 * @ctxt: an XML parser context
13928 * @ioread: an I/O read function
13929 * @ioclose: an I/O close function
13930 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013931 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013932 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013933 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013934 *
13935 * parse an XML document from I/O functions and source and build a tree.
13936 * This reuses the existing @ctxt parser context
13937 *
13938 * Returns the resulting document tree
13939 */
13940xmlDocPtr
13941xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13942 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013943 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013944 const char *encoding, int options)
13945{
13946 xmlParserInputBufferPtr input;
13947 xmlParserInputPtr stream;
13948
13949 if (ioread == NULL)
13950 return (NULL);
13951 if (ctxt == NULL)
13952 return (NULL);
13953
13954 xmlCtxtReset(ctxt);
13955
13956 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13957 XML_CHAR_ENCODING_NONE);
13958 if (input == NULL)
13959 return (NULL);
13960 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13961 if (stream == NULL) {
13962 xmlFreeParserInputBuffer(input);
13963 return (NULL);
13964 }
13965 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013966 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013967}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013968
13969#define bottom_parser
13970#include "elfgcchack.h"