blob: 5307dade86975ec351745aef8a044e204b3b238d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001019 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1020 defaults, NULL) < 0) {
1021 xmlFree(defaults);
1022 goto mem_error;
1023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 xmlDefAttrsPtr temp;
1026
1027 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001029 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001031 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001033 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1034 defaults, NULL) < 0) {
1035 xmlFree(defaults);
1036 goto mem_error;
1037 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001038 }
1039
1040 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001041 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001042 * are within the DTD and hen not associated to namespace names.
1043 */
1044 name = xmlSplitQName3(fullattr, &len);
1045 if (name == NULL) {
1046 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1047 prefix = NULL;
1048 } else {
1049 name = xmlDictLookup(ctxt->dict, name, -1);
1050 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1051 }
1052
1053 defaults->values[4 * defaults->nbAttrs] = name;
1054 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1055 /* intern the string and precompute the end */
1056 len = xmlStrlen(value);
1057 value = xmlDictLookup(ctxt->dict, value, len);
1058 defaults->values[4 * defaults->nbAttrs + 2] = value;
1059 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1060 defaults->nbAttrs++;
1061
1062 return;
1063
1064mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001066 return;
1067}
1068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069/**
1070 * xmlAddSpecialAttr:
1071 * @ctxt: an XML parser context
1072 * @fullname: the element fullname
1073 * @fullattr: the attribute fullname
1074 * @type: the attribute type
1075 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001076 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001077 */
1078static void
1079xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1080 const xmlChar *fullname,
1081 const xmlChar *fullattr,
1082 int type)
1083{
1084 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001085 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001086 if (ctxt->attsSpecial == NULL)
1087 goto mem_error;
1088 }
1089
Daniel Veillardac4118d2008-01-11 05:27:32 +00001090 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1091 return;
1092
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001093 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1094 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001095 return;
1096
1097mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001098 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001099 return;
1100}
1101
Daniel Veillard4432df22003-09-28 18:58:27 +00001102/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001103 * xmlCleanSpecialAttrCallback:
1104 *
1105 * Removes CDATA attributes from the special attribute table
1106 */
1107static void
1108xmlCleanSpecialAttrCallback(void *payload, void *data,
1109 const xmlChar *fullname, const xmlChar *fullattr,
1110 const xmlChar *unused ATTRIBUTE_UNUSED) {
1111 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1112
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001113 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001114 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1115 }
1116}
1117
1118/**
1119 * xmlCleanSpecialAttr:
1120 * @ctxt: an XML parser context
1121 *
1122 * Trim the list of attributes defined to remove all those of type
1123 * CDATA as they are not special. This call should be done when finishing
1124 * to parse the DTD and before starting to parse the document root.
1125 */
1126static void
1127xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1128{
1129 if (ctxt->attsSpecial == NULL)
1130 return;
1131
1132 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1133
1134 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1135 xmlHashFree(ctxt->attsSpecial, NULL);
1136 ctxt->attsSpecial = NULL;
1137 }
1138 return;
1139}
1140
1141/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001142 * xmlCheckLanguageID:
1143 * @lang: pointer to the string value
1144 *
1145 * Checks that the value conforms to the LanguageID production:
1146 *
1147 * NOTE: this is somewhat deprecated, those productions were removed from
1148 * the XML Second edition.
1149 *
1150 * [33] LanguageID ::= Langcode ('-' Subcode)*
1151 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1152 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1153 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1154 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1155 * [38] Subcode ::= ([a-z] | [A-Z])+
1156 *
1157 * Returns 1 if correct 0 otherwise
1158 **/
1159int
1160xmlCheckLanguageID(const xmlChar * lang)
1161{
1162 const xmlChar *cur = lang;
1163
1164 if (cur == NULL)
1165 return (0);
1166 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1167 ((cur[0] == 'I') && (cur[1] == '-'))) {
1168 /*
1169 * IANA code
1170 */
1171 cur += 2;
1172 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1173 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1174 cur++;
1175 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1176 ((cur[0] == 'X') && (cur[1] == '-'))) {
1177 /*
1178 * User code
1179 */
1180 cur += 2;
1181 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1182 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1183 cur++;
1184 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1185 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1186 /*
1187 * ISO639
1188 */
1189 cur++;
1190 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1191 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1192 cur++;
1193 else
1194 return (0);
1195 } else
1196 return (0);
1197 while (cur[0] != 0) { /* non input consuming */
1198 if (cur[0] != '-')
1199 return (0);
1200 cur++;
1201 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1202 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1203 cur++;
1204 else
1205 return (0);
1206 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1207 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1208 cur++;
1209 }
1210 return (1);
1211}
1212
Owen Taylor3473f882001-02-23 17:55:21 +00001213/************************************************************************
1214 * *
1215 * Parser stacks related functions and macros *
1216 * *
1217 ************************************************************************/
1218
1219xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1220 const xmlChar ** str);
1221
Daniel Veillard0fb18932003-09-07 09:14:37 +00001222#ifdef SAX2
1223/**
1224 * nsPush:
1225 * @ctxt: an XML parser context
1226 * @prefix: the namespace prefix or NULL
1227 * @URL: the namespace name
1228 *
1229 * Pushes a new parser namespace on top of the ns stack
1230 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001231 * Returns -1 in case of error, -2 if the namespace should be discarded
1232 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001233 */
1234static int
1235nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1236{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001237 if (ctxt->options & XML_PARSE_NSCLEAN) {
1238 int i;
1239 for (i = 0;i < ctxt->nsNr;i += 2) {
1240 if (ctxt->nsTab[i] == prefix) {
1241 /* in scope */
1242 if (ctxt->nsTab[i + 1] == URL)
1243 return(-2);
1244 /* out of scope keep it */
1245 break;
1246 }
1247 }
1248 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001249 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1250 ctxt->nsMax = 10;
1251 ctxt->nsNr = 0;
1252 ctxt->nsTab = (const xmlChar **)
1253 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1254 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001256 ctxt->nsMax = 0;
1257 return (-1);
1258 }
1259 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001260 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001261 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001262 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1263 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1264 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001265 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001266 ctxt->nsMax /= 2;
1267 return (-1);
1268 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001269 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001270 }
1271 ctxt->nsTab[ctxt->nsNr++] = prefix;
1272 ctxt->nsTab[ctxt->nsNr++] = URL;
1273 return (ctxt->nsNr);
1274}
1275/**
1276 * nsPop:
1277 * @ctxt: an XML parser context
1278 * @nr: the number to pop
1279 *
1280 * Pops the top @nr parser prefix/namespace from the ns stack
1281 *
1282 * Returns the number of namespaces removed
1283 */
1284static int
1285nsPop(xmlParserCtxtPtr ctxt, int nr)
1286{
1287 int i;
1288
1289 if (ctxt->nsTab == NULL) return(0);
1290 if (ctxt->nsNr < nr) {
1291 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1292 nr = ctxt->nsNr;
1293 }
1294 if (ctxt->nsNr <= 0)
1295 return (0);
1296
1297 for (i = 0;i < nr;i++) {
1298 ctxt->nsNr--;
1299 ctxt->nsTab[ctxt->nsNr] = NULL;
1300 }
1301 return(nr);
1302}
1303#endif
1304
1305static int
1306xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1307 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001309 int maxatts;
1310
1311 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001312 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001313 atts = (const xmlChar **)
1314 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001315 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001316 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001317 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1318 if (attallocs == NULL) goto mem_error;
1319 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001320 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001321 } else if (nr + 5 > ctxt->maxatts) {
1322 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001323 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1324 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001326 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001327 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1328 (maxatts / 5) * sizeof(int));
1329 if (attallocs == NULL) goto mem_error;
1330 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001331 ctxt->maxatts = maxatts;
1332 }
1333 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001334mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001335 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001336 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001337}
1338
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001339/**
1340 * inputPush:
1341 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001342 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001343 *
1344 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001345 *
1346 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001349inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1350{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001351 if ((ctxt == NULL) || (value == NULL))
1352 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->inputNr >= ctxt->inputMax) {
1354 ctxt->inputMax *= 2;
1355 ctxt->inputTab =
1356 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1357 ctxt->inputMax *
1358 sizeof(ctxt->inputTab[0]));
1359 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001360 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001361 return (0);
1362 }
1363 }
1364 ctxt->inputTab[ctxt->inputNr] = value;
1365 ctxt->input = value;
1366 return (ctxt->inputNr++);
1367}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001368/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001369 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001370 * @ctxt: an XML parser context
1371 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001373 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001374 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001375 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001376xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001377inputPop(xmlParserCtxtPtr ctxt)
1378{
1379 xmlParserInputPtr ret;
1380
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001381 if (ctxt == NULL)
1382 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001384 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 ctxt->inputNr--;
1386 if (ctxt->inputNr > 0)
1387 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1388 else
1389 ctxt->input = NULL;
1390 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001391 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001392 return (ret);
1393}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001394/**
1395 * nodePush:
1396 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001397 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001398 *
1399 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001400 *
1401 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001402 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001403int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001404nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1405{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001406 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001408 xmlNodePtr *tmp;
1409
1410 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1411 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001412 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001413 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001415 return (0);
1416 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001417 ctxt->nodeTab = tmp;
1418 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001420 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001421 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001422 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1423 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001424 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001425 return(0);
1426 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 ctxt->nodeTab[ctxt->nodeNr] = value;
1428 ctxt->node = value;
1429 return (ctxt->nodeNr++);
1430}
1431/**
1432 * nodePop:
1433 * @ctxt: an XML parser context
1434 *
1435 * Pops the top element node from the node stack
1436 *
1437 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001438 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001439xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001440nodePop(xmlParserCtxtPtr ctxt)
1441{
1442 xmlNodePtr ret;
1443
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001444 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001446 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001447 ctxt->nodeNr--;
1448 if (ctxt->nodeNr > 0)
1449 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1450 else
1451 ctxt->node = NULL;
1452 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001453 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001454 return (ret);
1455}
Daniel Veillarda2351322004-06-27 12:08:10 +00001456
1457#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001459 * nameNsPush:
1460 * @ctxt: an XML parser context
1461 * @value: the element name
1462 * @prefix: the element prefix
1463 * @URI: the element namespace name
1464 *
1465 * Pushes a new element name/prefix/URL on top of the name stack
1466 *
1467 * Returns -1 in case of error, the index in the stack otherwise
1468 */
1469static int
1470nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1471 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1472{
1473 if (ctxt->nameNr >= ctxt->nameMax) {
1474 const xmlChar * *tmp;
1475 void **tmp2;
1476 ctxt->nameMax *= 2;
1477 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1478 ctxt->nameMax *
1479 sizeof(ctxt->nameTab[0]));
1480 if (tmp == NULL) {
1481 ctxt->nameMax /= 2;
1482 goto mem_error;
1483 }
1484 ctxt->nameTab = tmp;
1485 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1486 ctxt->nameMax * 3 *
1487 sizeof(ctxt->pushTab[0]));
1488 if (tmp2 == NULL) {
1489 ctxt->nameMax /= 2;
1490 goto mem_error;
1491 }
1492 ctxt->pushTab = tmp2;
1493 }
1494 ctxt->nameTab[ctxt->nameNr] = value;
1495 ctxt->name = value;
1496 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1497 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001498 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001499 return (ctxt->nameNr++);
1500mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001501 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001502 return (-1);
1503}
1504/**
1505 * nameNsPop:
1506 * @ctxt: an XML parser context
1507 *
1508 * Pops the top element/prefix/URI name from the name stack
1509 *
1510 * Returns the name just removed
1511 */
1512static const xmlChar *
1513nameNsPop(xmlParserCtxtPtr ctxt)
1514{
1515 const xmlChar *ret;
1516
1517 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001518 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001519 ctxt->nameNr--;
1520 if (ctxt->nameNr > 0)
1521 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1522 else
1523 ctxt->name = NULL;
1524 ret = ctxt->nameTab[ctxt->nameNr];
1525 ctxt->nameTab[ctxt->nameNr] = NULL;
1526 return (ret);
1527}
Daniel Veillarda2351322004-06-27 12:08:10 +00001528#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001529
1530/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 * namePush:
1532 * @ctxt: an XML parser context
1533 * @value: the element name
1534 *
1535 * Pushes a new element name on top of the name stack
1536 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001539int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001540namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001542 if (ctxt == NULL) return (-1);
1543
Daniel Veillard1c732d22002-11-30 11:22:59 +00001544 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001545 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001546 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001547 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 ctxt->nameMax *
1549 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001550 if (tmp == NULL) {
1551 ctxt->nameMax /= 2;
1552 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001554 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001555 }
1556 ctxt->nameTab[ctxt->nameNr] = value;
1557 ctxt->name = value;
1558 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001559mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001560 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001561 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562}
1563/**
1564 * namePop:
1565 * @ctxt: an XML parser context
1566 *
1567 * Pops the top element name from the name stack
1568 *
1569 * Returns the name just removed
1570 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001571const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001572namePop(xmlParserCtxtPtr ctxt)
1573{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001574 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001575
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001576 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nameNr--;
1579 if (ctxt->nameNr > 0)
1580 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1581 else
1582 ctxt->name = NULL;
1583 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001588static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001589 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001590 int *tmp;
1591
Owen Taylor3473f882001-02-23 17:55:21 +00001592 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001593 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1594 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1595 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001596 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001597 return(0);
1598 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001599 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 ctxt->spaceTab[ctxt->spaceNr] = val;
1602 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1603 return(ctxt->spaceNr++);
1604}
1605
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001606static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001607 int ret;
1608 if (ctxt->spaceNr <= 0) return(0);
1609 ctxt->spaceNr--;
1610 if (ctxt->spaceNr > 0)
1611 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1612 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001613 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001614 ret = ctxt->spaceTab[ctxt->spaceNr];
1615 ctxt->spaceTab[ctxt->spaceNr] = -1;
1616 return(ret);
1617}
1618
1619/*
1620 * Macros for accessing the content. Those should be used only by the parser,
1621 * and not exported.
1622 *
1623 * Dirty macros, i.e. one often need to make assumption on the context to
1624 * use them
1625 *
1626 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1627 * To be used with extreme caution since operations consuming
1628 * characters may move the input buffer to a different location !
1629 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1630 * This should be used internally by the parser
1631 * only to compare to ASCII values otherwise it would break when
1632 * running with UTF-8 encoding.
1633 * RAW same as CUR but in the input buffer, bypass any token
1634 * extraction that may have been done
1635 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1636 * to compare on ASCII based substring.
1637 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001638 * strings without newlines within the parser.
1639 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1640 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001641 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1642 *
1643 * NEXT Skip to the next character, this does the proper decoding
1644 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001645 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001646 * CUR_CHAR(l) returns the current unicode character (int), set l
1647 * to the number of xmlChars used for the encoding [0-5].
1648 * CUR_SCHAR same but operate on a string instead of the context
1649 * COPY_BUF copy the current unicode char to the target buffer, increment
1650 * the index
1651 * GROW, SHRINK handling of input buffers
1652 */
1653
Daniel Veillardfdc91562002-07-01 21:52:03 +00001654#define RAW (*ctxt->input->cur)
1655#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001656#define NXT(val) ctxt->input->cur[(val)]
1657#define CUR_PTR ctxt->input->cur
1658
Daniel Veillarda07050d2003-10-19 14:46:32 +00001659#define CMP4( s, c1, c2, c3, c4 ) \
1660 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1661 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1662#define CMP5( s, c1, c2, c3, c4, c5 ) \
1663 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1664#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1665 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1666#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1667 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1668#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1669 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1670#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1671 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1672 ((unsigned char *) s)[ 8 ] == c9 )
1673#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1674 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1675 ((unsigned char *) s)[ 9 ] == c10 )
1676
Owen Taylor3473f882001-02-23 17:55:21 +00001677#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001678 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001679 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001680 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001681 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1682 xmlPopInput(ctxt); \
1683 } while (0)
1684
Daniel Veillard0b787f32004-03-26 17:29:53 +00001685#define SKIPL(val) do { \
1686 int skipl; \
1687 for(skipl=0; skipl<val; skipl++) { \
1688 if (*(ctxt->input->cur) == '\n') { \
1689 ctxt->input->line++; ctxt->input->col = 1; \
1690 } else ctxt->input->col++; \
1691 ctxt->nbChars++; \
1692 ctxt->input->cur++; \
1693 } \
1694 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1695 if ((*ctxt->input->cur == 0) && \
1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1697 xmlPopInput(ctxt); \
1698 } while (0)
1699
Daniel Veillarda880b122003-04-21 21:36:41 +00001700#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001701 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1702 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001703 xmlSHRINK (ctxt);
1704
1705static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1706 xmlParserInputShrink(ctxt->input);
1707 if ((*ctxt->input->cur == 0) &&
1708 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1709 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001710 }
Owen Taylor3473f882001-02-23 17:55:21 +00001711
Daniel Veillarda880b122003-04-21 21:36:41 +00001712#define GROW if ((ctxt->progressive == 0) && \
1713 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001714 xmlGROW (ctxt);
1715
1716static void xmlGROW (xmlParserCtxtPtr ctxt) {
1717 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1718 if ((*ctxt->input->cur == 0) &&
1719 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1720 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001721}
Owen Taylor3473f882001-02-23 17:55:21 +00001722
1723#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1724
1725#define NEXT xmlNextChar(ctxt)
1726
Daniel Veillard21a0f912001-02-25 19:54:14 +00001727#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001728 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001729 ctxt->input->cur++; \
1730 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001731 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001732 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1733 }
1734
Owen Taylor3473f882001-02-23 17:55:21 +00001735#define NEXTL(l) do { \
1736 if (*(ctxt->input->cur) == '\n') { \
1737 ctxt->input->line++; ctxt->input->col = 1; \
1738 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001739 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001740 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001741 } while (0)
1742
1743#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1744#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1745
1746#define COPY_BUF(l,b,i,v) \
1747 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001748 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001749
1750/**
1751 * xmlSkipBlankChars:
1752 * @ctxt: the XML parser context
1753 *
1754 * skip all blanks character found at that point in the input streams.
1755 * It pops up finished entities in the process if allowable at that point.
1756 *
1757 * Returns the number of space chars skipped
1758 */
1759
1760int
1761xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001762 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001763
1764 /*
1765 * It's Okay to use CUR/NEXT here since all the blanks are on
1766 * the ASCII range.
1767 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001768 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1769 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001771 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001772 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001773 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001774 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001775 if (*cur == '\n') {
1776 ctxt->input->line++; ctxt->input->col = 1;
1777 }
1778 cur++;
1779 res++;
1780 if (*cur == 0) {
1781 ctxt->input->cur = cur;
1782 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1783 cur = ctxt->input->cur;
1784 }
1785 }
1786 ctxt->input->cur = cur;
1787 } else {
1788 int cur;
1789 do {
1790 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001791 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001792 NEXT;
1793 cur = CUR;
1794 res++;
1795 }
1796 while ((cur == 0) && (ctxt->inputNr > 1) &&
1797 (ctxt->instate != XML_PARSER_COMMENT)) {
1798 xmlPopInput(ctxt);
1799 cur = CUR;
1800 }
1801 /*
1802 * Need to handle support of entities branching here
1803 */
1804 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1805 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1806 }
Owen Taylor3473f882001-02-23 17:55:21 +00001807 return(res);
1808}
1809
1810/************************************************************************
1811 * *
1812 * Commodity functions to handle entities *
1813 * *
1814 ************************************************************************/
1815
1816/**
1817 * xmlPopInput:
1818 * @ctxt: an XML parser context
1819 *
1820 * xmlPopInput: the current input pointed by ctxt->input came to an end
1821 * pop it and return the next char.
1822 *
1823 * Returns the current xmlChar in the parser context
1824 */
1825xmlChar
1826xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001827 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (xmlParserDebugEntities)
1829 xmlGenericError(xmlGenericErrorContext,
1830 "Popping input %d\n", ctxt->inputNr);
1831 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001832 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001833 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1834 return(xmlPopInput(ctxt));
1835 return(CUR);
1836}
1837
1838/**
1839 * xmlPushInput:
1840 * @ctxt: an XML parser context
1841 * @input: an XML parser input fragment (entity, XML fragment ...).
1842 *
1843 * xmlPushInput: switch to a new input stream which is stacked on top
1844 * of the previous one(s).
1845 */
1846void
1847xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1848 if (input == NULL) return;
1849
1850 if (xmlParserDebugEntities) {
1851 if ((ctxt->input != NULL) && (ctxt->input->filename))
1852 xmlGenericError(xmlGenericErrorContext,
1853 "%s(%d): ", ctxt->input->filename,
1854 ctxt->input->line);
1855 xmlGenericError(xmlGenericErrorContext,
1856 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1857 }
1858 inputPush(ctxt, input);
1859 GROW;
1860}
1861
1862/**
1863 * xmlParseCharRef:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse Reference declarations
1867 *
1868 * [66] CharRef ::= '&#' [0-9]+ ';' |
1869 * '&#x' [0-9a-fA-F]+ ';'
1870 *
1871 * [ WFC: Legal Character ]
1872 * Characters referred to using character references must match the
1873 * production for Char.
1874 *
1875 * Returns the value parsed (as an int), 0 in case of error
1876 */
1877int
1878xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001879 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001881 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001882
Owen Taylor3473f882001-02-23 17:55:21 +00001883 /*
1884 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1885 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001886 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001887 (NXT(2) == 'x')) {
1888 SKIP(3);
1889 GROW;
1890 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001891 if (count++ > 20) {
1892 count = 0;
1893 GROW;
1894 }
1895 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001896 val = val * 16 + (CUR - '0');
1897 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1898 val = val * 16 + (CUR - 'a') + 10;
1899 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1900 val = val * 16 + (CUR - 'A') + 10;
1901 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001902 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 val = 0;
1904 break;
1905 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
Owen Taylor3473f882001-02-23 17:55:21 +00001909 NEXT;
1910 count++;
1911 }
1912 if (RAW == ';') {
1913 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001914 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001915 ctxt->nbChars ++;
1916 ctxt->input->cur++;
1917 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001918 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001919 SKIP(2);
1920 GROW;
1921 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001922 if (count++ > 20) {
1923 count = 0;
1924 GROW;
1925 }
1926 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001927 val = val * 10 + (CUR - '0');
1928 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001929 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001930 val = 0;
1931 break;
1932 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001933 if (val > 0x10FFFF)
1934 outofrange = val;
1935
Owen Taylor3473f882001-02-23 17:55:21 +00001936 NEXT;
1937 count++;
1938 }
1939 if (RAW == ';') {
1940 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001941 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001942 ctxt->nbChars ++;
1943 ctxt->input->cur++;
1944 }
1945 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 }
1948
1949 /*
1950 * [ WFC: Legal Character ]
1951 * Characters referred to using character references must match the
1952 * production for Char.
1953 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001954 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001955 return(val);
1956 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001957 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1958 "xmlParseCharRef: invalid xmlChar value %d\n",
1959 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 }
1961 return(0);
1962}
1963
1964/**
1965 * xmlParseStringCharRef:
1966 * @ctxt: an XML parser context
1967 * @str: a pointer to an index in the string
1968 *
1969 * parse Reference declarations, variant parsing from a string rather
1970 * than an an input flow.
1971 *
1972 * [66] CharRef ::= '&#' [0-9]+ ';' |
1973 * '&#x' [0-9a-fA-F]+ ';'
1974 *
1975 * [ WFC: Legal Character ]
1976 * Characters referred to using character references must match the
1977 * production for Char.
1978 *
1979 * Returns the value parsed (as an int), 0 in case of error, str will be
1980 * updated to the current value of the index
1981 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001982static int
Owen Taylor3473f882001-02-23 17:55:21 +00001983xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1984 const xmlChar *ptr;
1985 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001986 unsigned int val = 0;
1987 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001988
1989 if ((str == NULL) || (*str == NULL)) return(0);
1990 ptr = *str;
1991 cur = *ptr;
1992 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1993 ptr += 3;
1994 cur = *ptr;
1995 while (cur != ';') { /* Non input consuming loop */
1996 if ((cur >= '0') && (cur <= '9'))
1997 val = val * 16 + (cur - '0');
1998 else if ((cur >= 'a') && (cur <= 'f'))
1999 val = val * 16 + (cur - 'a') + 10;
2000 else if ((cur >= 'A') && (cur <= 'F'))
2001 val = val * 16 + (cur - 'A') + 10;
2002 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002003 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002004 val = 0;
2005 break;
2006 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002007 if (val > 0x10FFFF)
2008 outofrange = val;
2009
Owen Taylor3473f882001-02-23 17:55:21 +00002010 ptr++;
2011 cur = *ptr;
2012 }
2013 if (cur == ';')
2014 ptr++;
2015 } else if ((cur == '&') && (ptr[1] == '#')){
2016 ptr += 2;
2017 cur = *ptr;
2018 while (cur != ';') { /* Non input consuming loops */
2019 if ((cur >= '0') && (cur <= '9'))
2020 val = val * 10 + (cur - '0');
2021 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002022 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002023 val = 0;
2024 break;
2025 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002026 if (val > 0x10FFFF)
2027 outofrange = val;
2028
Owen Taylor3473f882001-02-23 17:55:21 +00002029 ptr++;
2030 cur = *ptr;
2031 }
2032 if (cur == ';')
2033 ptr++;
2034 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002035 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002036 return(0);
2037 }
2038 *str = ptr;
2039
2040 /*
2041 * [ WFC: Legal Character ]
2042 * Characters referred to using character references must match the
2043 * production for Char.
2044 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002045 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002046 return(val);
2047 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002048 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2049 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2050 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002051 }
2052 return(0);
2053}
2054
2055/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002056 * xmlNewBlanksWrapperInputStream:
2057 * @ctxt: an XML parser context
2058 * @entity: an Entity pointer
2059 *
2060 * Create a new input stream for wrapping
2061 * blanks around a PEReference
2062 *
2063 * Returns the new input stream or NULL
2064 */
2065
2066static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2067
Daniel Veillardf4862f02002-09-10 11:13:43 +00002068static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002069xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2070 xmlParserInputPtr input;
2071 xmlChar *buffer;
2072 size_t length;
2073 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2075 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002076 return(NULL);
2077 }
2078 if (xmlParserDebugEntities)
2079 xmlGenericError(xmlGenericErrorContext,
2080 "new blanks wrapper for entity: %s\n", entity->name);
2081 input = xmlNewInputStream(ctxt);
2082 if (input == NULL) {
2083 return(NULL);
2084 }
2085 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002086 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002087 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002088 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002089 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002090 return(NULL);
2091 }
2092 buffer [0] = ' ';
2093 buffer [1] = '%';
2094 buffer [length-3] = ';';
2095 buffer [length-2] = ' ';
2096 buffer [length-1] = 0;
2097 memcpy(buffer + 2, entity->name, length - 5);
2098 input->free = deallocblankswrapper;
2099 input->base = buffer;
2100 input->cur = buffer;
2101 input->length = length;
2102 input->end = &buffer[length];
2103 return(input);
2104}
2105
2106/**
Owen Taylor3473f882001-02-23 17:55:21 +00002107 * xmlParserHandlePEReference:
2108 * @ctxt: the parser context
2109 *
2110 * [69] PEReference ::= '%' Name ';'
2111 *
2112 * [ WFC: No Recursion ]
2113 * A parsed entity must not contain a recursive
2114 * reference to itself, either directly or indirectly.
2115 *
2116 * [ WFC: Entity Declared ]
2117 * In a document without any DTD, a document with only an internal DTD
2118 * subset which contains no parameter entity references, or a document
2119 * with "standalone='yes'", ... ... The declaration of a parameter
2120 * entity must precede any reference to it...
2121 *
2122 * [ VC: Entity Declared ]
2123 * In a document with an external subset or external parameter entities
2124 * with "standalone='no'", ... ... The declaration of a parameter entity
2125 * must precede any reference to it...
2126 *
2127 * [ WFC: In DTD ]
2128 * Parameter-entity references may only appear in the DTD.
2129 * NOTE: misleading but this is handled.
2130 *
2131 * A PEReference may have been detected in the current input stream
2132 * the handling is done accordingly to
2133 * http://www.w3.org/TR/REC-xml#entproc
2134 * i.e.
2135 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002136 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002137 */
2138void
2139xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002140 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002141 xmlEntityPtr entity = NULL;
2142 xmlParserInputPtr input;
2143
Owen Taylor3473f882001-02-23 17:55:21 +00002144 if (RAW != '%') return;
2145 switch(ctxt->instate) {
2146 case XML_PARSER_CDATA_SECTION:
2147 return;
2148 case XML_PARSER_COMMENT:
2149 return;
2150 case XML_PARSER_START_TAG:
2151 return;
2152 case XML_PARSER_END_TAG:
2153 return;
2154 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002155 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002156 return;
2157 case XML_PARSER_PROLOG:
2158 case XML_PARSER_START:
2159 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002160 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002161 return;
2162 case XML_PARSER_ENTITY_DECL:
2163 case XML_PARSER_CONTENT:
2164 case XML_PARSER_ATTRIBUTE_VALUE:
2165 case XML_PARSER_PI:
2166 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002167 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002168 /* we just ignore it there */
2169 return;
2170 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002171 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002172 return;
2173 case XML_PARSER_ENTITY_VALUE:
2174 /*
2175 * NOTE: in the case of entity values, we don't do the
2176 * substitution here since we need the literal
2177 * entity value to be able to save the internal
2178 * subset of the document.
2179 * This will be handled by xmlStringDecodeEntities
2180 */
2181 return;
2182 case XML_PARSER_DTD:
2183 /*
2184 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2185 * In the internal DTD subset, parameter-entity references
2186 * can occur only where markup declarations can occur, not
2187 * within markup declarations.
2188 * In that case this is handled in xmlParseMarkupDecl
2189 */
2190 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2191 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002192 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002194 break;
2195 case XML_PARSER_IGNORE:
2196 return;
2197 }
2198
2199 NEXT;
2200 name = xmlParseName(ctxt);
2201 if (xmlParserDebugEntities)
2202 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002203 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002204 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002205 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002206 } else {
2207 if (RAW == ';') {
2208 NEXT;
2209 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2210 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2211 if (entity == NULL) {
2212
2213 /*
2214 * [ WFC: Entity Declared ]
2215 * In a document without any DTD, a document with only an
2216 * internal DTD subset which contains no parameter entity
2217 * references, or a document with "standalone='yes'", ...
2218 * ... The declaration of a parameter entity must precede
2219 * any reference to it...
2220 */
2221 if ((ctxt->standalone == 1) ||
2222 ((ctxt->hasExternalSubset == 0) &&
2223 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002225 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 } else {
2227 /*
2228 * [ VC: Entity Declared ]
2229 * In a document with an external subset or external
2230 * parameter entities with "standalone='no'", ...
2231 * ... The declaration of a parameter entity must precede
2232 * any reference to it...
2233 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002234 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2235 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2236 "PEReference: %%%s; not found\n",
2237 name);
2238 } else
2239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2240 "PEReference: %%%s; not found\n",
2241 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 ctxt->valid = 0;
2243 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002244 } else if (ctxt->input->free != deallocblankswrapper) {
2245 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2246 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else {
2248 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2249 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002250 xmlChar start[4];
2251 xmlCharEncoding enc;
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 /*
2254 * handle the extra spaces added before and after
2255 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002256 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002257 */
2258 input = xmlNewEntityInputStream(ctxt, entity);
2259 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002260
2261 /*
2262 * Get the 4 first bytes and decode the charset
2263 * if enc != XML_CHAR_ENCODING_NONE
2264 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002265 * Note that, since we may have some non-UTF8
2266 * encoding (like UTF16, bug 135229), the 'length'
2267 * is not known, but we can calculate based upon
2268 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002269 */
2270 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002271 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002272 start[0] = RAW;
2273 start[1] = NXT(1);
2274 start[2] = NXT(2);
2275 start[3] = NXT(3);
2276 enc = xmlDetectCharEncoding(start, 4);
2277 if (enc != XML_CHAR_ENCODING_NONE) {
2278 xmlSwitchEncoding(ctxt, enc);
2279 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002280 }
2281
Owen Taylor3473f882001-02-23 17:55:21 +00002282 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002283 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2284 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlParseTextDecl(ctxt);
2286 }
Owen Taylor3473f882001-02-23 17:55:21 +00002287 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2289 "PEReference: %s is not a parameter entity\n",
2290 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292 }
2293 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002294 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002295 }
Owen Taylor3473f882001-02-23 17:55:21 +00002296 }
2297}
2298
2299/*
2300 * Macro used to grow the current buffer.
2301 */
2302#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002303 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002304 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002305 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002306 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002307 if (tmp == NULL) goto mem_error; \
2308 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002309}
2310
2311/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002312 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002313 * @ctxt: the parser context
2314 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002315 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002316 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2317 * @end: an end marker xmlChar, 0 if none
2318 * @end2: an end marker xmlChar, 0 if none
2319 * @end3: an end marker xmlChar, 0 if none
2320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002321 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002322 *
2323 * [67] Reference ::= EntityRef | CharRef
2324 *
2325 * [69] PEReference ::= '%' Name ';'
2326 *
2327 * Returns A newly allocated string with the substitution done. The caller
2328 * must deallocate it !
2329 */
2330xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2332 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002333 xmlChar *buffer = NULL;
2334 int buffer_size = 0;
2335
2336 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002337 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002338 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 xmlEntityPtr ent;
2340 int c,l;
2341 int nbchars = 0;
2342
Daniel Veillarda82b1822004-11-08 16:24:57 +00002343 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002344 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002346
2347 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002348 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002349 return(NULL);
2350 }
2351
2352 /*
2353 * allocate a translation buffer.
2354 */
2355 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002356 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002357 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002358
2359 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002360 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 * we are operating on already parsed values.
2362 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002363 if (str < last)
2364 c = CUR_SCHAR(str, l);
2365 else
2366 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002367 while ((c != 0) && (c != end) && /* non input consuming loop */
2368 (c != end2) && (c != end3)) {
2369
2370 if (c == 0) break;
2371 if ((c == '&') && (str[1] == '#')) {
2372 int val = xmlParseStringCharRef(ctxt, &str);
2373 if (val != 0) {
2374 COPY_BUF(0,buffer,nbchars,val);
2375 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002376 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2377 growBuffer(buffer);
2378 }
Owen Taylor3473f882001-02-23 17:55:21 +00002379 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2380 if (xmlParserDebugEntities)
2381 xmlGenericError(xmlGenericErrorContext,
2382 "String decoding Entity Reference: %.30s\n",
2383 str);
2384 ent = xmlParseStringEntityRef(ctxt, &str);
2385 if ((ent != NULL) &&
2386 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2387 if (ent->content != NULL) {
2388 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002389 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2390 growBuffer(buffer);
2391 }
Owen Taylor3473f882001-02-23 17:55:21 +00002392 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002393 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2394 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002395 }
2396 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002397 ctxt->depth++;
2398 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2399 0, 0, 0);
2400 ctxt->depth--;
2401 if (rep != NULL) {
2402 current = rep;
2403 while (*current != 0) { /* non input consuming loop */
2404 buffer[nbchars++] = *current++;
2405 if (nbchars >
2406 buffer_size - XML_PARSER_BUFFER_SIZE) {
2407 growBuffer(buffer);
2408 }
2409 }
2410 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002411 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002412 }
2413 } else if (ent != NULL) {
2414 int i = xmlStrlen(ent->name);
2415 const xmlChar *cur = ent->name;
2416
2417 buffer[nbchars++] = '&';
2418 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2419 growBuffer(buffer);
2420 }
2421 for (;i > 0;i--)
2422 buffer[nbchars++] = *cur++;
2423 buffer[nbchars++] = ';';
2424 }
2425 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2426 if (xmlParserDebugEntities)
2427 xmlGenericError(xmlGenericErrorContext,
2428 "String decoding PE Reference: %.30s\n", str);
2429 ent = xmlParseStringPEReference(ctxt, &str);
2430 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002431 if (ent->content == NULL) {
2432 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2433 }
2434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 ctxt->depth++;
2436 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2437 0, 0, 0);
2438 ctxt->depth--;
2439 if (rep != NULL) {
2440 current = rep;
2441 while (*current != 0) { /* non input consuming loop */
2442 buffer[nbchars++] = *current++;
2443 if (nbchars >
2444 buffer_size - XML_PARSER_BUFFER_SIZE) {
2445 growBuffer(buffer);
2446 }
2447 }
2448 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002449 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002450 }
2451 }
2452 } else {
2453 COPY_BUF(l,buffer,nbchars,c);
2454 str += l;
2455 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2456 growBuffer(buffer);
2457 }
2458 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002459 if (str < last)
2460 c = CUR_SCHAR(str, l);
2461 else
2462 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002463 }
2464 buffer[nbchars++] = 0;
2465 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002466
2467mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002468 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002469 if (rep != NULL)
2470 xmlFree(rep);
2471 if (buffer != NULL)
2472 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002473 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002474}
2475
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476/**
2477 * xmlStringDecodeEntities:
2478 * @ctxt: the parser context
2479 * @str: the input string
2480 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2481 * @end: an end marker xmlChar, 0 if none
2482 * @end2: an end marker xmlChar, 0 if none
2483 * @end3: an end marker xmlChar, 0 if none
2484 *
2485 * Takes a entity string content and process to do the adequate substitutions.
2486 *
2487 * [67] Reference ::= EntityRef | CharRef
2488 *
2489 * [69] PEReference ::= '%' Name ';'
2490 *
2491 * Returns A newly allocated string with the substitution done. The caller
2492 * must deallocate it !
2493 */
2494xmlChar *
2495xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2496 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002497 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002498 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2499 end, end2, end3));
2500}
Owen Taylor3473f882001-02-23 17:55:21 +00002501
2502/************************************************************************
2503 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002504 * Commodity functions, cleanup needed ? *
2505 * *
2506 ************************************************************************/
2507
2508/**
2509 * areBlanks:
2510 * @ctxt: an XML parser context
2511 * @str: a xmlChar *
2512 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002513 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002514 *
2515 * Is this a sequence of blank chars that one can ignore ?
2516 *
2517 * Returns 1 if ignorable 0 otherwise.
2518 */
2519
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002520static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2521 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002522 int i, ret;
2523 xmlNodePtr lastChild;
2524
Daniel Veillard05c13a22001-09-09 08:38:09 +00002525 /*
2526 * Don't spend time trying to differentiate them, the same callback is
2527 * used !
2528 */
2529 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002530 return(0);
2531
Owen Taylor3473f882001-02-23 17:55:21 +00002532 /*
2533 * Check for xml:space value.
2534 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002535 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2536 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002537 return(0);
2538
2539 /*
2540 * Check that the string is made of blanks
2541 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002542 if (blank_chars == 0) {
2543 for (i = 0;i < len;i++)
2544 if (!(IS_BLANK_CH(str[i]))) return(0);
2545 }
Owen Taylor3473f882001-02-23 17:55:21 +00002546
2547 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002548 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002549 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002550 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (ctxt->myDoc != NULL) {
2552 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2553 if (ret == 0) return(1);
2554 if (ret == 1) return(0);
2555 }
2556
2557 /*
2558 * Otherwise, heuristic :-\
2559 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002560 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002561 if ((ctxt->node->children == NULL) &&
2562 (RAW == '<') && (NXT(1) == '/')) return(0);
2563
2564 lastChild = xmlGetLastChild(ctxt->node);
2565 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002566 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2567 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else if (xmlNodeIsText(lastChild))
2569 return(0);
2570 else if ((ctxt->node->children != NULL) &&
2571 (xmlNodeIsText(ctxt->node->children)))
2572 return(0);
2573 return(1);
2574}
2575
Owen Taylor3473f882001-02-23 17:55:21 +00002576/************************************************************************
2577 * *
2578 * Extra stuff for namespace support *
2579 * Relates to http://www.w3.org/TR/WD-xml-names *
2580 * *
2581 ************************************************************************/
2582
2583/**
2584 * xmlSplitQName:
2585 * @ctxt: an XML parser context
2586 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002588 *
2589 * parse an UTF8 encoded XML qualified name string
2590 *
2591 * [NS 5] QName ::= (Prefix ':')? LocalPart
2592 *
2593 * [NS 6] Prefix ::= NCName
2594 *
2595 * [NS 7] LocalPart ::= NCName
2596 *
2597 * Returns the local part, and prefix is updated
2598 * to get the Prefix if any.
2599 */
2600
2601xmlChar *
2602xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2603 xmlChar buf[XML_MAX_NAMELEN + 5];
2604 xmlChar *buffer = NULL;
2605 int len = 0;
2606 int max = XML_MAX_NAMELEN;
2607 xmlChar *ret = NULL;
2608 const xmlChar *cur = name;
2609 int c;
2610
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002611 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 *prefix = NULL;
2613
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002614 if (cur == NULL) return(NULL);
2615
Owen Taylor3473f882001-02-23 17:55:21 +00002616#ifndef XML_XML_NAMESPACE
2617 /* xml: prefix is not really a namespace */
2618 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2619 (cur[2] == 'l') && (cur[3] == ':'))
2620 return(xmlStrdup(name));
2621#endif
2622
Daniel Veillard597bc482003-07-24 16:08:28 +00002623 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (cur[0] == ':')
2625 return(xmlStrdup(name));
2626
2627 c = *cur++;
2628 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2629 buf[len++] = c;
2630 c = *cur++;
2631 }
2632 if (len >= max) {
2633 /*
2634 * Okay someone managed to make a huge name, so he's ready to pay
2635 * for the processing speed.
2636 */
2637 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002638
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002639 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002640 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002641 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 return(NULL);
2643 }
2644 memcpy(buffer, buf, len);
2645 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2646 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002647 xmlChar *tmp;
2648
Owen Taylor3473f882001-02-23 17:55:21 +00002649 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002650 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002651 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002652 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002653 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002654 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002655 return(NULL);
2656 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002657 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002658 }
2659 buffer[len++] = c;
2660 c = *cur++;
2661 }
2662 buffer[len] = 0;
2663 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002664
Daniel Veillard597bc482003-07-24 16:08:28 +00002665 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002666 if (buffer != NULL)
2667 xmlFree(buffer);
2668 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002669 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002670 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002671
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL)
2673 ret = xmlStrndup(buf, len);
2674 else {
2675 ret = buffer;
2676 buffer = NULL;
2677 max = XML_MAX_NAMELEN;
2678 }
2679
2680
2681 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002682 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002683 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002684 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002685 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002686 }
Owen Taylor3473f882001-02-23 17:55:21 +00002687 len = 0;
2688
Daniel Veillardbb284f42002-10-16 18:02:47 +00002689 /*
2690 * Check that the first character is proper to start
2691 * a new name
2692 */
2693 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2694 ((c >= 0x41) && (c <= 0x5A)) ||
2695 (c == '_') || (c == ':'))) {
2696 int l;
2697 int first = CUR_SCHAR(cur, l);
2698
2699 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002700 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002701 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002702 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002703 }
2704 }
2705 cur++;
2706
Owen Taylor3473f882001-02-23 17:55:21 +00002707 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2708 buf[len++] = c;
2709 c = *cur++;
2710 }
2711 if (len >= max) {
2712 /*
2713 * Okay someone managed to make a huge name, so he's ready to pay
2714 * for the processing speed.
2715 */
2716 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002717
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002718 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002719 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002720 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002721 return(NULL);
2722 }
2723 memcpy(buffer, buf, len);
2724 while (c != 0) { /* tested bigname2.xml */
2725 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002726 xmlChar *tmp;
2727
Owen Taylor3473f882001-02-23 17:55:21 +00002728 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002729 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002730 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002731 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002732 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002733 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002736 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002737 }
2738 buffer[len++] = c;
2739 c = *cur++;
2740 }
2741 buffer[len] = 0;
2742 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002743
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if (buffer == NULL)
2745 ret = xmlStrndup(buf, len);
2746 else {
2747 ret = buffer;
2748 }
2749 }
2750
2751 return(ret);
2752}
2753
2754/************************************************************************
2755 * *
2756 * The parser itself *
2757 * Relates to http://www.w3.org/TR/REC-xml *
2758 * *
2759 ************************************************************************/
2760
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002761static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002762static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002764
Owen Taylor3473f882001-02-23 17:55:21 +00002765/**
2766 * xmlParseName:
2767 * @ctxt: an XML parser context
2768 *
2769 * parse an XML name.
2770 *
2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772 * CombiningChar | Extender
2773 *
2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2775 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002776 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002777 *
2778 * Returns the Name parsed or NULL
2779 */
2780
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002781const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002782xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002783 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002784 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002785 int count = 0;
2786
2787 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002788
2789 /*
2790 * Accelerator for simple ASCII names
2791 */
2792 in = ctxt->input->cur;
2793 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2794 ((*in >= 0x41) && (*in <= 0x5A)) ||
2795 (*in == '_') || (*in == ':')) {
2796 in++;
2797 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2798 ((*in >= 0x41) && (*in <= 0x5A)) ||
2799 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002800 (*in == '_') || (*in == '-') ||
2801 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002802 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002803 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002804 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002805 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002806 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002807 ctxt->nbChars += count;
2808 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002809 if (ret == NULL)
2810 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002811 return(ret);
2812 }
2813 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002814 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002815}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002816
Daniel Veillard46de64e2002-05-29 08:21:33 +00002817/**
2818 * xmlParseNameAndCompare:
2819 * @ctxt: an XML parser context
2820 *
2821 * parse an XML name and compares for match
2822 * (specialized for endtag parsing)
2823 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002824 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2825 * and the name for mismatch
2826 */
2827
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002828static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002829xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002830 register const xmlChar *cmp = other;
2831 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002832 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002833
2834 GROW;
2835
2836 in = ctxt->input->cur;
2837 while (*in != 0 && *in == *cmp) {
2838 ++in;
2839 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002840 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002841 }
William M. Brack76e95df2003-10-18 16:20:14 +00002842 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002843 /* success */
2844 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002845 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002846 }
2847 /* failure (or end of input buffer), check with full function */
2848 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002849 /* strings coming from the dictionnary direct compare possible */
2850 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002851 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002852 }
2853 return ret;
2854}
2855
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002856static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002857xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002858 int len = 0, l;
2859 int c;
2860 int count = 0;
2861
2862 /*
2863 * Handler for more complex cases
2864 */
2865 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002866 c = CUR_CHAR(l);
2867 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2868 (!IS_LETTER(c) && (c != '_') &&
2869 (c != ':'))) {
2870 return(NULL);
2871 }
2872
2873 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002874 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002875 (c == '.') || (c == '-') ||
2876 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002877 (IS_COMBINING(c)) ||
2878 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002879 if (count++ > 100) {
2880 count = 0;
2881 GROW;
2882 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002883 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002884 NEXTL(l);
2885 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
Daniel Veillard96688262005-08-23 18:14:12 +00002887 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2888 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002889 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002890}
2891
2892/**
2893 * xmlParseStringName:
2894 * @ctxt: an XML parser context
2895 * @str: a pointer to the string pointer (IN/OUT)
2896 *
2897 * parse an XML name.
2898 *
2899 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2900 * CombiningChar | Extender
2901 *
2902 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2903 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002904 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002905 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002906 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002907 * is updated to the current location in the string.
2908 */
2909
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002910static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002911xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2912 xmlChar buf[XML_MAX_NAMELEN + 5];
2913 const xmlChar *cur = *str;
2914 int len = 0, l;
2915 int c;
2916
2917 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002918 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002919 (c != ':')) {
2920 return(NULL);
2921 }
2922
William M. Brack871611b2003-10-18 04:53:14 +00002923 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002924 (c == '.') || (c == '-') ||
2925 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002926 (IS_COMBINING(c)) ||
2927 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002928 COPY_BUF(l,buf,len,c);
2929 cur += l;
2930 c = CUR_SCHAR(cur, l);
2931 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2932 /*
2933 * Okay someone managed to make a huge name, so he's ready to pay
2934 * for the processing speed.
2935 */
2936 xmlChar *buffer;
2937 int max = len * 2;
2938
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002939 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002940 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002941 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002942 return(NULL);
2943 }
2944 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002946 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002947 (c == '.') || (c == '-') ||
2948 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002949 (IS_COMBINING(c)) ||
2950 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002951 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002952 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002953 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002954 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002957 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002958 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002959 return(NULL);
2960 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002961 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002962 }
2963 COPY_BUF(l,buffer,len,c);
2964 cur += l;
2965 c = CUR_SCHAR(cur, l);
2966 }
2967 buffer[len] = 0;
2968 *str = cur;
2969 return(buffer);
2970 }
2971 }
2972 *str = cur;
2973 return(xmlStrndup(buf, len));
2974}
2975
2976/**
2977 * xmlParseNmtoken:
2978 * @ctxt: an XML parser context
2979 *
2980 * parse an XML Nmtoken.
2981 *
2982 * [7] Nmtoken ::= (NameChar)+
2983 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002984 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002985 *
2986 * Returns the Nmtoken parsed or NULL
2987 */
2988
2989xmlChar *
2990xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2991 xmlChar buf[XML_MAX_NAMELEN + 5];
2992 int len = 0, l;
2993 int c;
2994 int count = 0;
2995
2996 GROW;
2997 c = CUR_CHAR(l);
2998
William M. Brack871611b2003-10-18 04:53:14 +00002999 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003000 (c == '.') || (c == '-') ||
3001 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003002 (IS_COMBINING(c)) ||
3003 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (count++ > 100) {
3005 count = 0;
3006 GROW;
3007 }
3008 COPY_BUF(l,buf,len,c);
3009 NEXTL(l);
3010 c = CUR_CHAR(l);
3011 if (len >= XML_MAX_NAMELEN) {
3012 /*
3013 * Okay someone managed to make a huge token, so he's ready to pay
3014 * for the processing speed.
3015 */
3016 xmlChar *buffer;
3017 int max = len * 2;
3018
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003019 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003020 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003021 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003022 return(NULL);
3023 }
3024 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003025 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003026 (c == '.') || (c == '-') ||
3027 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003028 (IS_COMBINING(c)) ||
3029 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003030 if (count++ > 100) {
3031 count = 0;
3032 GROW;
3033 }
3034 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 xmlChar *tmp;
3036
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003039 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003040 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003041 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003042 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 COPY_BUF(l,buffer,len,c);
3048 NEXTL(l);
3049 c = CUR_CHAR(l);
3050 }
3051 buffer[len] = 0;
3052 return(buffer);
3053 }
3054 }
3055 if (len == 0)
3056 return(NULL);
3057 return(xmlStrndup(buf, len));
3058}
3059
3060/**
3061 * xmlParseEntityValue:
3062 * @ctxt: an XML parser context
3063 * @orig: if non-NULL store a copy of the original entity value
3064 *
3065 * parse a value for ENTITY declarations
3066 *
3067 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3068 * "'" ([^%&'] | PEReference | Reference)* "'"
3069 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003070 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003071 */
3072
3073xmlChar *
3074xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3075 xmlChar *buf = NULL;
3076 int len = 0;
3077 int size = XML_PARSER_BUFFER_SIZE;
3078 int c, l;
3079 xmlChar stop;
3080 xmlChar *ret = NULL;
3081 const xmlChar *cur = NULL;
3082 xmlParserInputPtr input;
3083
3084 if (RAW == '"') stop = '"';
3085 else if (RAW == '\'') stop = '\'';
3086 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 return(NULL);
3089 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003090 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003091 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003092 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003093 return(NULL);
3094 }
3095
3096 /*
3097 * The content of the entity definition is copied in a buffer.
3098 */
3099
3100 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3101 input = ctxt->input;
3102 GROW;
3103 NEXT;
3104 c = CUR_CHAR(l);
3105 /*
3106 * NOTE: 4.4.5 Included in Literal
3107 * When a parameter entity reference appears in a literal entity
3108 * value, ... a single or double quote character in the replacement
3109 * text is always treated as a normal data character and will not
3110 * terminate the literal.
3111 * In practice it means we stop the loop only when back at parsing
3112 * the initial entity and the quote is found
3113 */
William M. Brack871611b2003-10-18 04:53:14 +00003114 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003115 (ctxt->input != input))) {
3116 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 xmlChar *tmp;
3118
Owen Taylor3473f882001-02-23 17:55:21 +00003119 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003120 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3121 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003122 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003123 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003126 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 }
3128 COPY_BUF(l,buf,len,c);
3129 NEXTL(l);
3130 /*
3131 * Pop-up of finished entities.
3132 */
3133 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3134 xmlPopInput(ctxt);
3135
3136 GROW;
3137 c = CUR_CHAR(l);
3138 if (c == 0) {
3139 GROW;
3140 c = CUR_CHAR(l);
3141 }
3142 }
3143 buf[len] = 0;
3144
3145 /*
3146 * Raise problem w.r.t. '&' and '%' being used in non-entities
3147 * reference constructs. Note Charref will be handled in
3148 * xmlStringDecodeEntities()
3149 */
3150 cur = buf;
3151 while (*cur != 0) { /* non input consuming */
3152 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3153 xmlChar *name;
3154 xmlChar tmp = *cur;
3155
3156 cur++;
3157 name = xmlParseStringName(ctxt, &cur);
3158 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003159 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003160 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003161 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003163 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3164 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003165 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003166 }
3167 if (name != NULL)
3168 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003169 if (*cur == 0)
3170 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003171 }
3172 cur++;
3173 }
3174
3175 /*
3176 * Then PEReference entities are substituted.
3177 */
3178 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003179 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003180 xmlFree(buf);
3181 } else {
3182 NEXT;
3183 /*
3184 * NOTE: 4.4.7 Bypassed
3185 * When a general entity reference appears in the EntityValue in
3186 * an entity declaration, it is bypassed and left as is.
3187 * so XML_SUBSTITUTE_REF is not set here.
3188 */
3189 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3190 0, 0, 0);
3191 if (orig != NULL)
3192 *orig = buf;
3193 else
3194 xmlFree(buf);
3195 }
3196
3197 return(ret);
3198}
3199
3200/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003201 * xmlParseAttValueComplex:
3202 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003203 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003205 *
3206 * parse a value for an attribute, this is the fallback function
3207 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003208 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003209 *
3210 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3211 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003212static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003213xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003214 xmlChar limit = 0;
3215 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003216 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003217 int len = 0;
3218 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003219 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003220 xmlChar *current = NULL;
3221 xmlEntityPtr ent;
3222
Owen Taylor3473f882001-02-23 17:55:21 +00003223 if (NXT(0) == '"') {
3224 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3225 limit = '"';
3226 NEXT;
3227 } else if (NXT(0) == '\'') {
3228 limit = '\'';
3229 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3230 NEXT;
3231 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003232 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003233 return(NULL);
3234 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003235
Owen Taylor3473f882001-02-23 17:55:21 +00003236 /*
3237 * allocate a translation buffer.
3238 */
3239 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003240 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003241 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003242
3243 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003244 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003245 */
3246 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003247 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003248 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003249 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003250 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003251 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003252 if (NXT(1) == '#') {
3253 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003254
Owen Taylor3473f882001-02-23 17:55:21 +00003255 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003256 if (ctxt->replaceEntities) {
3257 if (len > buf_size - 10) {
3258 growBuffer(buf);
3259 }
3260 buf[len++] = '&';
3261 } else {
3262 /*
3263 * The reparsing will be done in xmlStringGetNodeList()
3264 * called by the attribute() function in SAX.c
3265 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003266 if (len > buf_size - 10) {
3267 growBuffer(buf);
3268 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003269 buf[len++] = '&';
3270 buf[len++] = '#';
3271 buf[len++] = '3';
3272 buf[len++] = '8';
3273 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003274 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003275 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003276 if (len > buf_size - 10) {
3277 growBuffer(buf);
3278 }
Owen Taylor3473f882001-02-23 17:55:21 +00003279 len += xmlCopyChar(0, &buf[len], val);
3280 }
3281 } else {
3282 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003283 if ((ent != NULL) &&
3284 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3285 if (len > buf_size - 10) {
3286 growBuffer(buf);
3287 }
3288 if ((ctxt->replaceEntities == 0) &&
3289 (ent->content[0] == '&')) {
3290 buf[len++] = '&';
3291 buf[len++] = '#';
3292 buf[len++] = '3';
3293 buf[len++] = '8';
3294 buf[len++] = ';';
3295 } else {
3296 buf[len++] = ent->content[0];
3297 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003298 } else if ((ent != NULL) &&
3299 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003300 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3301 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003302 XML_SUBSTITUTE_REF,
3303 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (rep != NULL) {
3305 current = rep;
3306 while (*current != 0) { /* non input consuming */
3307 buf[len++] = *current++;
3308 if (len > buf_size - 10) {
3309 growBuffer(buf);
3310 }
3311 }
3312 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003313 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003316 if (len > buf_size - 10) {
3317 growBuffer(buf);
3318 }
Owen Taylor3473f882001-02-23 17:55:21 +00003319 if (ent->content != NULL)
3320 buf[len++] = ent->content[0];
3321 }
3322 } else if (ent != NULL) {
3323 int i = xmlStrlen(ent->name);
3324 const xmlChar *cur = ent->name;
3325
3326 /*
3327 * This may look absurd but is needed to detect
3328 * entities problems
3329 */
3330 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3331 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003333 XML_SUBSTITUTE_REF, 0, 0, 0);
3334 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003336 rep = NULL;
3337 }
Owen Taylor3473f882001-02-23 17:55:21 +00003338 }
3339
3340 /*
3341 * Just output the reference
3342 */
3343 buf[len++] = '&';
3344 if (len > buf_size - i - 10) {
3345 growBuffer(buf);
3346 }
3347 for (;i > 0;i--)
3348 buf[len++] = *cur++;
3349 buf[len++] = ';';
3350 }
3351 }
3352 } else {
3353 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003354 if ((len != 0) || (!normalize)) {
3355 if ((!normalize) || (!in_space)) {
3356 COPY_BUF(l,buf,len,0x20);
3357 if (len > buf_size - 10) {
3358 growBuffer(buf);
3359 }
3360 }
3361 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 }
3363 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003364 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003365 COPY_BUF(l,buf,len,c);
3366 if (len > buf_size - 10) {
3367 growBuffer(buf);
3368 }
3369 }
3370 NEXTL(l);
3371 }
3372 GROW;
3373 c = CUR_CHAR(l);
3374 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003375 if ((in_space) && (normalize)) {
3376 while (buf[len - 1] == 0x20) len--;
3377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003378 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003380 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003382 if ((c != 0) && (!IS_CHAR(c))) {
3383 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3384 "invalid character in attribute value\n");
3385 } else {
3386 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3387 "AttValue: ' expected\n");
3388 }
Owen Taylor3473f882001-02-23 17:55:21 +00003389 } else
3390 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003391 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003393
3394mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003395 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003396 if (buf != NULL)
3397 xmlFree(buf);
3398 if (rep != NULL)
3399 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003400 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003401}
3402
3403/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003404 * xmlParseAttValue:
3405 * @ctxt: an XML parser context
3406 *
3407 * parse a value for an attribute
3408 * Note: the parser won't do substitution of entities here, this
3409 * will be handled later in xmlStringGetNodeList
3410 *
3411 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3412 * "'" ([^<&'] | Reference)* "'"
3413 *
3414 * 3.3.3 Attribute-Value Normalization:
3415 * Before the value of an attribute is passed to the application or
3416 * checked for validity, the XML processor must normalize it as follows:
3417 * - a character reference is processed by appending the referenced
3418 * character to the attribute value
3419 * - an entity reference is processed by recursively processing the
3420 * replacement text of the entity
3421 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3422 * appending #x20 to the normalized value, except that only a single
3423 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3424 * parsed entity or the literal entity value of an internal parsed entity
3425 * - other characters are processed by appending them to the normalized value
3426 * If the declared value is not CDATA, then the XML processor must further
3427 * process the normalized attribute value by discarding any leading and
3428 * trailing space (#x20) characters, and by replacing sequences of space
3429 * (#x20) characters by a single space (#x20) character.
3430 * All attributes for which no declaration has been read should be treated
3431 * by a non-validating parser as if declared CDATA.
3432 *
3433 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3434 */
3435
3436
3437xmlChar *
3438xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003439 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003440 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003441}
3442
3443/**
Owen Taylor3473f882001-02-23 17:55:21 +00003444 * xmlParseSystemLiteral:
3445 * @ctxt: an XML parser context
3446 *
3447 * parse an XML Literal
3448 *
3449 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3450 *
3451 * Returns the SystemLiteral parsed or NULL
3452 */
3453
3454xmlChar *
3455xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3456 xmlChar *buf = NULL;
3457 int len = 0;
3458 int size = XML_PARSER_BUFFER_SIZE;
3459 int cur, l;
3460 xmlChar stop;
3461 int state = ctxt->instate;
3462 int count = 0;
3463
3464 SHRINK;
3465 if (RAW == '"') {
3466 NEXT;
3467 stop = '"';
3468 } else if (RAW == '\'') {
3469 NEXT;
3470 stop = '\'';
3471 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003472 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003473 return(NULL);
3474 }
3475
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003476 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003477 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003478 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003479 return(NULL);
3480 }
3481 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3482 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003483 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003484 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003485 xmlChar *tmp;
3486
Owen Taylor3473f882001-02-23 17:55:21 +00003487 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003488 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3489 if (tmp == NULL) {
3490 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003491 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003492 ctxt->instate = (xmlParserInputState) state;
3493 return(NULL);
3494 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003495 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003496 }
3497 count++;
3498 if (count > 50) {
3499 GROW;
3500 count = 0;
3501 }
3502 COPY_BUF(l,buf,len,cur);
3503 NEXTL(l);
3504 cur = CUR_CHAR(l);
3505 if (cur == 0) {
3506 GROW;
3507 SHRINK;
3508 cur = CUR_CHAR(l);
3509 }
3510 }
3511 buf[len] = 0;
3512 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003513 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003514 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003515 } else {
3516 NEXT;
3517 }
3518 return(buf);
3519}
3520
3521/**
3522 * xmlParsePubidLiteral:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse an XML public literal
3526 *
3527 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3528 *
3529 * Returns the PubidLiteral parsed or NULL.
3530 */
3531
3532xmlChar *
3533xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3534 xmlChar *buf = NULL;
3535 int len = 0;
3536 int size = XML_PARSER_BUFFER_SIZE;
3537 xmlChar cur;
3538 xmlChar stop;
3539 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003540 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003541
3542 SHRINK;
3543 if (RAW == '"') {
3544 NEXT;
3545 stop = '"';
3546 } else if (RAW == '\'') {
3547 NEXT;
3548 stop = '\'';
3549 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003550 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003551 return(NULL);
3552 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003553 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003554 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003555 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003556 return(NULL);
3557 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003558 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003560 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003561 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 xmlChar *tmp;
3563
Owen Taylor3473f882001-02-23 17:55:21 +00003564 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003565 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3566 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003567 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003568 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 return(NULL);
3570 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003571 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003572 }
3573 buf[len++] = cur;
3574 count++;
3575 if (count > 50) {
3576 GROW;
3577 count = 0;
3578 }
3579 NEXT;
3580 cur = CUR;
3581 if (cur == 0) {
3582 GROW;
3583 SHRINK;
3584 cur = CUR;
3585 }
3586 }
3587 buf[len] = 0;
3588 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003589 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003590 } else {
3591 NEXT;
3592 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003593 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003594 return(buf);
3595}
3596
Daniel Veillard48b2f892001-02-25 16:11:03 +00003597void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003598
3599/*
3600 * used for the test in the inner loop of the char data testing
3601 */
3602static const unsigned char test_char_data[256] = {
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3608 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3609 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3610 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3611 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3612 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3613 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3614 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3615 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3616 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3617 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3618 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3632 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3633 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3634 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3635};
3636
Owen Taylor3473f882001-02-23 17:55:21 +00003637/**
3638 * xmlParseCharData:
3639 * @ctxt: an XML parser context
3640 * @cdata: int indicating whether we are within a CDATA section
3641 *
3642 * parse a CharData section.
3643 * if we are within a CDATA section ']]>' marks an end of section.
3644 *
3645 * The right angle bracket (>) may be represented using the string "&gt;",
3646 * and must, for compatibility, be escaped using "&gt;" or a character
3647 * reference when it appears in the string "]]>" in content, when that
3648 * string is not marking the end of a CDATA section.
3649 *
3650 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3651 */
3652
3653void
3654xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003655 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003656 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003657 int line = ctxt->input->line;
3658 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003659 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003660
3661 SHRINK;
3662 GROW;
3663 /*
3664 * Accelerated common case where input don't need to be
3665 * modified before passing it to the handler.
3666 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003667 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003668 in = ctxt->input->cur;
3669 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003670get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003671 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003672 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003673 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003674 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003675 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003676 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003677 goto get_more_space;
3678 }
3679 if (*in == '<') {
3680 nbchar = in - ctxt->input->cur;
3681 if (nbchar > 0) {
3682 const xmlChar *tmp = ctxt->input->cur;
3683 ctxt->input->cur = in;
3684
Daniel Veillard34099b42004-11-04 17:34:35 +00003685 if ((ctxt->sax != NULL) &&
3686 (ctxt->sax->ignorableWhitespace !=
3687 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003688 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003689 if (ctxt->sax->ignorableWhitespace != NULL)
3690 ctxt->sax->ignorableWhitespace(ctxt->userData,
3691 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003692 } else {
3693 if (ctxt->sax->characters != NULL)
3694 ctxt->sax->characters(ctxt->userData,
3695 tmp, nbchar);
3696 if (*ctxt->space == -1)
3697 *ctxt->space = -2;
3698 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003699 } else if ((ctxt->sax != NULL) &&
3700 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003701 ctxt->sax->characters(ctxt->userData,
3702 tmp, nbchar);
3703 }
3704 }
3705 return;
3706 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003707
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003708get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003709 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003710 while (test_char_data[*in]) {
3711 in++;
3712 ccol++;
3713 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003714 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003715 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003716 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003717 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003718 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003719 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003720 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003721 }
3722 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003723 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003724 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003725 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003726 return;
3727 }
3728 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003729 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003730 goto get_more;
3731 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003732 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003733 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003734 if ((ctxt->sax != NULL) &&
3735 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003736 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003737 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003738 const xmlChar *tmp = ctxt->input->cur;
3739 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003740
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003741 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003742 if (ctxt->sax->ignorableWhitespace != NULL)
3743 ctxt->sax->ignorableWhitespace(ctxt->userData,
3744 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003745 } else {
3746 if (ctxt->sax->characters != NULL)
3747 ctxt->sax->characters(ctxt->userData,
3748 tmp, nbchar);
3749 if (*ctxt->space == -1)
3750 *ctxt->space = -2;
3751 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003752 line = ctxt->input->line;
3753 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003754 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003755 if (ctxt->sax->characters != NULL)
3756 ctxt->sax->characters(ctxt->userData,
3757 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003758 line = ctxt->input->line;
3759 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003760 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003761 }
3762 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003763 if (*in == 0xD) {
3764 in++;
3765 if (*in == 0xA) {
3766 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003767 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003768 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003769 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003770 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003771 in--;
3772 }
3773 if (*in == '<') {
3774 return;
3775 }
3776 if (*in == '&') {
3777 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003778 }
3779 SHRINK;
3780 GROW;
3781 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003782 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003783 nbchar = 0;
3784 }
Daniel Veillard50582112001-03-26 22:52:16 +00003785 ctxt->input->line = line;
3786 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003787 xmlParseCharDataComplex(ctxt, cdata);
3788}
3789
Daniel Veillard01c13b52002-12-10 15:19:08 +00003790/**
3791 * xmlParseCharDataComplex:
3792 * @ctxt: an XML parser context
3793 * @cdata: int indicating whether we are within a CDATA section
3794 *
3795 * parse a CharData section.this is the fallback function
3796 * of xmlParseCharData() when the parsing requires handling
3797 * of non-ASCII characters.
3798 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003799void
3800xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003801 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3802 int nbchar = 0;
3803 int cur, l;
3804 int count = 0;
3805
3806 SHRINK;
3807 GROW;
3808 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003809 while ((cur != '<') && /* checked */
3810 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003811 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003812 if ((cur == ']') && (NXT(1) == ']') &&
3813 (NXT(2) == '>')) {
3814 if (cdata) break;
3815 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 }
3818 }
3819 COPY_BUF(l,buf,nbchar,cur);
3820 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003821 buf[nbchar] = 0;
3822
Owen Taylor3473f882001-02-23 17:55:21 +00003823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003824 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003825 */
3826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003827 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003828 if (ctxt->sax->ignorableWhitespace != NULL)
3829 ctxt->sax->ignorableWhitespace(ctxt->userData,
3830 buf, nbchar);
3831 } else {
3832 if (ctxt->sax->characters != NULL)
3833 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003834 if ((ctxt->sax->characters !=
3835 ctxt->sax->ignorableWhitespace) &&
3836 (*ctxt->space == -1))
3837 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
3839 }
3840 nbchar = 0;
3841 }
3842 count++;
3843 if (count > 50) {
3844 GROW;
3845 count = 0;
3846 }
3847 NEXTL(l);
3848 cur = CUR_CHAR(l);
3849 }
3850 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003851 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003853 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003854 */
3855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003856 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (ctxt->sax->ignorableWhitespace != NULL)
3858 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3859 } else {
3860 if (ctxt->sax->characters != NULL)
3861 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003862 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3863 (*ctxt->space == -1))
3864 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 }
3867 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003868 if ((cur != 0) && (!IS_CHAR(cur))) {
3869 /* Generate the error and skip the offending character */
3870 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3871 "PCDATA invalid Char value %d\n",
3872 cur);
3873 NEXTL(l);
3874 }
Owen Taylor3473f882001-02-23 17:55:21 +00003875}
3876
3877/**
3878 * xmlParseExternalID:
3879 * @ctxt: an XML parser context
3880 * @publicID: a xmlChar** receiving PubidLiteral
3881 * @strict: indicate whether we should restrict parsing to only
3882 * production [75], see NOTE below
3883 *
3884 * Parse an External ID or a Public ID
3885 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003886 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003887 * 'PUBLIC' S PubidLiteral S SystemLiteral
3888 *
3889 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3890 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3891 *
3892 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3893 *
3894 * Returns the function returns SystemLiteral and in the second
3895 * case publicID receives PubidLiteral, is strict is off
3896 * it is possible to return NULL and have publicID set.
3897 */
3898
3899xmlChar *
3900xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3901 xmlChar *URI = NULL;
3902
3903 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003904
3905 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003906 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003907 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003908 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3910 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003911 }
3912 SKIP_BLANKS;
3913 URI = xmlParseSystemLiteral(ctxt);
3914 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003917 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003918 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003921 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003922 }
3923 SKIP_BLANKS;
3924 *publicID = xmlParsePubidLiteral(ctxt);
3925 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 }
3928 if (strict) {
3929 /*
3930 * We don't handle [83] so "S SystemLiteral" is required.
3931 */
William M. Brack76e95df2003-10-18 16:20:14 +00003932 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003934 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003935 }
3936 } else {
3937 /*
3938 * We handle [83] so we return immediately, if
3939 * "S SystemLiteral" is not detected. From a purely parsing
3940 * point of view that's a nice mess.
3941 */
3942 const xmlChar *ptr;
3943 GROW;
3944
3945 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003946 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003947
William M. Brack76e95df2003-10-18 16:20:14 +00003948 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003949 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3950 }
3951 SKIP_BLANKS;
3952 URI = xmlParseSystemLiteral(ctxt);
3953 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003954 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003955 }
3956 }
3957 return(URI);
3958}
3959
3960/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003961 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003962 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003963 * @buf: the already parsed part of the buffer
3964 * @len: number of bytes filles in the buffer
3965 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003966 *
3967 * Skip an XML (SGML) comment <!-- .... -->
3968 * The spec says that "For compatibility, the string "--" (double-hyphen)
3969 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003970 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003971 *
3972 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3973 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003974static void
3975xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003976 int q, ql;
3977 int r, rl;
3978 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003979 xmlParserInputPtr input = ctxt->input;
3980 int count = 0;
3981
Owen Taylor3473f882001-02-23 17:55:21 +00003982 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003983 len = 0;
3984 size = XML_PARSER_BUFFER_SIZE;
3985 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3986 if (buf == NULL) {
3987 xmlErrMemory(ctxt, NULL);
3988 return;
3989 }
Owen Taylor3473f882001-02-23 17:55:21 +00003990 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003991 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003992 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003993 if (q == 0)
3994 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003995 if (!IS_CHAR(q)) {
3996 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3997 "xmlParseComment: invalid xmlChar value %d\n",
3998 q);
3999 xmlFree (buf);
4000 return;
4001 }
Owen Taylor3473f882001-02-23 17:55:21 +00004002 NEXTL(ql);
4003 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004004 if (r == 0)
4005 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004006 if (!IS_CHAR(r)) {
4007 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4008 "xmlParseComment: invalid xmlChar value %d\n",
4009 q);
4010 xmlFree (buf);
4011 return;
4012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 NEXTL(rl);
4014 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004015 if (cur == 0)
4016 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004017 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004018 ((cur != '>') ||
4019 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004020 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004021 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004022 }
4023 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004024 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004025 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004026 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4027 if (new_buf == NULL) {
4028 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004030 return;
4031 }
William M. Bracka3215c72004-07-31 16:24:01 +00004032 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 COPY_BUF(ql,buf,len,q);
4035 q = r;
4036 ql = rl;
4037 r = cur;
4038 rl = l;
4039
4040 count++;
4041 if (count > 50) {
4042 GROW;
4043 count = 0;
4044 }
4045 NEXTL(l);
4046 cur = CUR_CHAR(l);
4047 if (cur == 0) {
4048 SHRINK;
4049 GROW;
4050 cur = CUR_CHAR(l);
4051 }
4052 }
4053 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004054 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004055 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004056 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004057 } else if (!IS_CHAR(cur)) {
4058 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4059 "xmlParseComment: invalid xmlChar value %d\n",
4060 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004061 } else {
4062 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004063 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4064 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
4066 NEXT;
4067 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4068 (!ctxt->disableSAX))
4069 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
Daniel Veillardda629342007-08-01 07:49:06 +00004071 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004072 return;
4073not_terminated:
4074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4075 "Comment not terminated\n", NULL);
4076 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004077 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004078}
Daniel Veillardda629342007-08-01 07:49:06 +00004079
Daniel Veillard4c778d82005-01-23 17:37:44 +00004080/**
4081 * xmlParseComment:
4082 * @ctxt: an XML parser context
4083 *
4084 * Skip an XML (SGML) comment <!-- .... -->
4085 * The spec says that "For compatibility, the string "--" (double-hyphen)
4086 * must not occur within comments. "
4087 *
4088 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4089 */
4090void
4091xmlParseComment(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4093 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004094 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004095 xmlParserInputState state;
4096 const xmlChar *in;
4097 int nbchar = 0, ccol;
4098
4099 /*
4100 * Check that there is a comment right here.
4101 */
4102 if ((RAW != '<') || (NXT(1) != '!') ||
4103 (NXT(2) != '-') || (NXT(3) != '-')) return;
4104
4105 state = ctxt->instate;
4106 ctxt->instate = XML_PARSER_COMMENT;
4107 SKIP(4);
4108 SHRINK;
4109 GROW;
4110
4111 /*
4112 * Accelerated common case where input don't need to be
4113 * modified before passing it to the handler.
4114 */
4115 in = ctxt->input->cur;
4116 do {
4117 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004118 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004119 ctxt->input->line++; ctxt->input->col = 1;
4120 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004121 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004122 }
4123get_more:
4124 ccol = ctxt->input->col;
4125 while (((*in > '-') && (*in <= 0x7F)) ||
4126 ((*in >= 0x20) && (*in < '-')) ||
4127 (*in == 0x09)) {
4128 in++;
4129 ccol++;
4130 }
4131 ctxt->input->col = ccol;
4132 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004133 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004134 ctxt->input->line++; ctxt->input->col = 1;
4135 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004136 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004137 goto get_more;
4138 }
4139 nbchar = in - ctxt->input->cur;
4140 /*
4141 * save current set of data
4142 */
4143 if (nbchar > 0) {
4144 if ((ctxt->sax != NULL) &&
4145 (ctxt->sax->comment != NULL)) {
4146 if (buf == NULL) {
4147 if ((*in == '-') && (in[1] == '-'))
4148 size = nbchar + 1;
4149 else
4150 size = XML_PARSER_BUFFER_SIZE + nbchar;
4151 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4152 if (buf == NULL) {
4153 xmlErrMemory(ctxt, NULL);
4154 ctxt->instate = state;
4155 return;
4156 }
4157 len = 0;
4158 } else if (len + nbchar + 1 >= size) {
4159 xmlChar *new_buf;
4160 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4161 new_buf = (xmlChar *) xmlRealloc(buf,
4162 size * sizeof(xmlChar));
4163 if (new_buf == NULL) {
4164 xmlFree (buf);
4165 xmlErrMemory(ctxt, NULL);
4166 ctxt->instate = state;
4167 return;
4168 }
4169 buf = new_buf;
4170 }
4171 memcpy(&buf[len], ctxt->input->cur, nbchar);
4172 len += nbchar;
4173 buf[len] = 0;
4174 }
4175 }
4176 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004177 if (*in == 0xA) {
4178 in++;
4179 ctxt->input->line++; ctxt->input->col = 1;
4180 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004181 if (*in == 0xD) {
4182 in++;
4183 if (*in == 0xA) {
4184 ctxt->input->cur = in;
4185 in++;
4186 ctxt->input->line++; ctxt->input->col = 1;
4187 continue; /* while */
4188 }
4189 in--;
4190 }
4191 SHRINK;
4192 GROW;
4193 in = ctxt->input->cur;
4194 if (*in == '-') {
4195 if (in[1] == '-') {
4196 if (in[2] == '>') {
4197 SKIP(3);
4198 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4199 (!ctxt->disableSAX)) {
4200 if (buf != NULL)
4201 ctxt->sax->comment(ctxt->userData, buf);
4202 else
4203 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4204 }
4205 if (buf != NULL)
4206 xmlFree(buf);
4207 ctxt->instate = state;
4208 return;
4209 }
4210 if (buf != NULL)
4211 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4212 "Comment not terminated \n<!--%.50s\n",
4213 buf);
4214 else
4215 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4216 "Comment not terminated \n", NULL);
4217 in++;
4218 ctxt->input->col++;
4219 }
4220 in++;
4221 ctxt->input->col++;
4222 goto get_more;
4223 }
4224 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4225 xmlParseCommentComplex(ctxt, buf, len, size);
4226 ctxt->instate = state;
4227 return;
4228}
4229
Owen Taylor3473f882001-02-23 17:55:21 +00004230
4231/**
4232 * xmlParsePITarget:
4233 * @ctxt: an XML parser context
4234 *
4235 * parse the name of a PI
4236 *
4237 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4238 *
4239 * Returns the PITarget name or NULL
4240 */
4241
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004242const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004243xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004244 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004245
4246 name = xmlParseName(ctxt);
4247 if ((name != NULL) &&
4248 ((name[0] == 'x') || (name[0] == 'X')) &&
4249 ((name[1] == 'm') || (name[1] == 'M')) &&
4250 ((name[2] == 'l') || (name[2] == 'L'))) {
4251 int i;
4252 if ((name[0] == 'x') && (name[1] == 'm') &&
4253 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004254 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004255 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004256 return(name);
4257 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004258 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 return(name);
4260 }
4261 for (i = 0;;i++) {
4262 if (xmlW3CPIs[i] == NULL) break;
4263 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4264 return(name);
4265 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004266 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4267 "xmlParsePITarget: invalid name prefix 'xml'\n",
4268 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 return(name);
4271}
4272
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004273#ifdef LIBXML_CATALOG_ENABLED
4274/**
4275 * xmlParseCatalogPI:
4276 * @ctxt: an XML parser context
4277 * @catalog: the PI value string
4278 *
4279 * parse an XML Catalog Processing Instruction.
4280 *
4281 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4282 *
4283 * Occurs only if allowed by the user and if happening in the Misc
4284 * part of the document before any doctype informations
4285 * This will add the given catalog to the parsing context in order
4286 * to be used if there is a resolution need further down in the document
4287 */
4288
4289static void
4290xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4291 xmlChar *URL = NULL;
4292 const xmlChar *tmp, *base;
4293 xmlChar marker;
4294
4295 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004296 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004297 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4298 goto error;
4299 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004300 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004301 if (*tmp != '=') {
4302 return;
4303 }
4304 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004305 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004306 marker = *tmp;
4307 if ((marker != '\'') && (marker != '"'))
4308 goto error;
4309 tmp++;
4310 base = tmp;
4311 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4312 if (*tmp == 0)
4313 goto error;
4314 URL = xmlStrndup(base, tmp - base);
4315 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004316 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004317 if (*tmp != 0)
4318 goto error;
4319
4320 if (URL != NULL) {
4321 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4322 xmlFree(URL);
4323 }
4324 return;
4325
4326error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004327 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4328 "Catalog PI syntax error: %s\n",
4329 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004330 if (URL != NULL)
4331 xmlFree(URL);
4332}
4333#endif
4334
Owen Taylor3473f882001-02-23 17:55:21 +00004335/**
4336 * xmlParsePI:
4337 * @ctxt: an XML parser context
4338 *
4339 * parse an XML Processing Instruction.
4340 *
4341 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4342 *
4343 * The processing is transfered to SAX once parsed.
4344 */
4345
4346void
4347xmlParsePI(xmlParserCtxtPtr ctxt) {
4348 xmlChar *buf = NULL;
4349 int len = 0;
4350 int size = XML_PARSER_BUFFER_SIZE;
4351 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004352 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlParserInputState state;
4354 int count = 0;
4355
4356 if ((RAW == '<') && (NXT(1) == '?')) {
4357 xmlParserInputPtr input = ctxt->input;
4358 state = ctxt->instate;
4359 ctxt->instate = XML_PARSER_PI;
4360 /*
4361 * this is a Processing Instruction.
4362 */
4363 SKIP(2);
4364 SHRINK;
4365
4366 /*
4367 * Parse the target name and check for special support like
4368 * namespace.
4369 */
4370 target = xmlParsePITarget(ctxt);
4371 if (target != NULL) {
4372 if ((RAW == '?') && (NXT(1) == '>')) {
4373 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004374 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4375 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004376 }
4377 SKIP(2);
4378
4379 /*
4380 * SAX: PI detected.
4381 */
4382 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4383 (ctxt->sax->processingInstruction != NULL))
4384 ctxt->sax->processingInstruction(ctxt->userData,
4385 target, NULL);
4386 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004387 return;
4388 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004389 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004391 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 ctxt->instate = state;
4393 return;
4394 }
4395 cur = CUR;
4396 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004397 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4398 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
4400 SKIP_BLANKS;
4401 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004402 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004403 ((cur != '?') || (NXT(1) != '>'))) {
4404 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004405 xmlChar *tmp;
4406
Owen Taylor3473f882001-02-23 17:55:21 +00004407 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004408 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4409 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004410 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004411 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004412 ctxt->instate = state;
4413 return;
4414 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004415 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 }
4417 count++;
4418 if (count > 50) {
4419 GROW;
4420 count = 0;
4421 }
4422 COPY_BUF(l,buf,len,cur);
4423 NEXTL(l);
4424 cur = CUR_CHAR(l);
4425 if (cur == 0) {
4426 SHRINK;
4427 GROW;
4428 cur = CUR_CHAR(l);
4429 }
4430 }
4431 buf[len] = 0;
4432 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004433 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4434 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004435 } else {
4436 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4438 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004439 }
4440 SKIP(2);
4441
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004442#ifdef LIBXML_CATALOG_ENABLED
4443 if (((state == XML_PARSER_MISC) ||
4444 (state == XML_PARSER_START)) &&
4445 (xmlStrEqual(target, XML_CATALOG_PI))) {
4446 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4447 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4448 (allow == XML_CATA_ALLOW_ALL))
4449 xmlParseCatalogPI(ctxt, buf);
4450 }
4451#endif
4452
4453
Owen Taylor3473f882001-02-23 17:55:21 +00004454 /*
4455 * SAX: PI detected.
4456 */
4457 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4458 (ctxt->sax->processingInstruction != NULL))
4459 ctxt->sax->processingInstruction(ctxt->userData,
4460 target, buf);
4461 }
4462 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004463 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004464 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004465 }
4466 ctxt->instate = state;
4467 }
4468}
4469
4470/**
4471 * xmlParseNotationDecl:
4472 * @ctxt: an XML parser context
4473 *
4474 * parse a notation declaration
4475 *
4476 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4477 *
4478 * Hence there is actually 3 choices:
4479 * 'PUBLIC' S PubidLiteral
4480 * 'PUBLIC' S PubidLiteral S SystemLiteral
4481 * and 'SYSTEM' S SystemLiteral
4482 *
4483 * See the NOTE on xmlParseExternalID().
4484 */
4485
4486void
4487xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004488 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004489 xmlChar *Pubid;
4490 xmlChar *Systemid;
4491
Daniel Veillarda07050d2003-10-19 14:46:32 +00004492 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004493 xmlParserInputPtr input = ctxt->input;
4494 SHRINK;
4495 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004496 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 return;
4500 }
4501 SKIP_BLANKS;
4502
Daniel Veillard76d66f42001-05-16 21:05:17 +00004503 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004504 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004506 return;
4507 }
William M. Brack76e95df2003-10-18 16:20:14 +00004508 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004510 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004511 return;
4512 }
4513 SKIP_BLANKS;
4514
4515 /*
4516 * Parse the IDs.
4517 */
4518 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4519 SKIP_BLANKS;
4520
4521 if (RAW == '>') {
4522 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4524 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004525 }
4526 NEXT;
4527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4528 (ctxt->sax->notationDecl != NULL))
4529 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4530 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004531 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004532 }
Owen Taylor3473f882001-02-23 17:55:21 +00004533 if (Systemid != NULL) xmlFree(Systemid);
4534 if (Pubid != NULL) xmlFree(Pubid);
4535 }
4536}
4537
4538/**
4539 * xmlParseEntityDecl:
4540 * @ctxt: an XML parser context
4541 *
4542 * parse <!ENTITY declarations
4543 *
4544 * [70] EntityDecl ::= GEDecl | PEDecl
4545 *
4546 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4547 *
4548 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4549 *
4550 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4551 *
4552 * [74] PEDef ::= EntityValue | ExternalID
4553 *
4554 * [76] NDataDecl ::= S 'NDATA' S Name
4555 *
4556 * [ VC: Notation Declared ]
4557 * The Name must match the declared name of a notation.
4558 */
4559
4560void
4561xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004562 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 xmlChar *value = NULL;
4564 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004565 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004566 int isParameter = 0;
4567 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004568 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004569
Daniel Veillard4c778d82005-01-23 17:37:44 +00004570 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004571 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004572 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 SHRINK;
4574 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004575 skipped = SKIP_BLANKS;
4576 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4578 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004579 }
Owen Taylor3473f882001-02-23 17:55:21 +00004580
4581 if (RAW == '%') {
4582 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004583 skipped = SKIP_BLANKS;
4584 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4586 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004587 }
Owen Taylor3473f882001-02-23 17:55:21 +00004588 isParameter = 1;
4589 }
4590
Daniel Veillard76d66f42001-05-16 21:05:17 +00004591 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004593 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4594 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004595 return;
4596 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004597 skipped = SKIP_BLANKS;
4598 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4600 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004601 }
Owen Taylor3473f882001-02-23 17:55:21 +00004602
Daniel Veillardf5582f12002-06-11 10:08:16 +00004603 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004604 /*
4605 * handle the various case of definitions...
4606 */
4607 if (isParameter) {
4608 if ((RAW == '"') || (RAW == '\'')) {
4609 value = xmlParseEntityValue(ctxt, &orig);
4610 if (value) {
4611 if ((ctxt->sax != NULL) &&
4612 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4613 ctxt->sax->entityDecl(ctxt->userData, name,
4614 XML_INTERNAL_PARAMETER_ENTITY,
4615 NULL, NULL, value);
4616 }
4617 } else {
4618 URI = xmlParseExternalID(ctxt, &literal, 1);
4619 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 }
4622 if (URI) {
4623 xmlURIPtr uri;
4624
4625 uri = xmlParseURI((const char *) URI);
4626 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004627 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4628 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004629 /*
4630 * This really ought to be a well formedness error
4631 * but the XML Core WG decided otherwise c.f. issue
4632 * E26 of the XML erratas.
4633 */
Owen Taylor3473f882001-02-23 17:55:21 +00004634 } else {
4635 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004636 /*
4637 * Okay this is foolish to block those but not
4638 * invalid URIs.
4639 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004640 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004641 } else {
4642 if ((ctxt->sax != NULL) &&
4643 (!ctxt->disableSAX) &&
4644 (ctxt->sax->entityDecl != NULL))
4645 ctxt->sax->entityDecl(ctxt->userData, name,
4646 XML_EXTERNAL_PARAMETER_ENTITY,
4647 literal, URI, NULL);
4648 }
4649 xmlFreeURI(uri);
4650 }
4651 }
4652 }
4653 } else {
4654 if ((RAW == '"') || (RAW == '\'')) {
4655 value = xmlParseEntityValue(ctxt, &orig);
4656 if ((ctxt->sax != NULL) &&
4657 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4658 ctxt->sax->entityDecl(ctxt->userData, name,
4659 XML_INTERNAL_GENERAL_ENTITY,
4660 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004661 /*
4662 * For expat compatibility in SAX mode.
4663 */
4664 if ((ctxt->myDoc == NULL) ||
4665 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4666 if (ctxt->myDoc == NULL) {
4667 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004668 if (ctxt->myDoc == NULL) {
4669 xmlErrMemory(ctxt, "New Doc failed");
4670 return;
4671 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004672 }
4673 if (ctxt->myDoc->intSubset == NULL)
4674 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4675 BAD_CAST "fake", NULL, NULL);
4676
Daniel Veillard1af9a412003-08-20 22:54:39 +00004677 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4678 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004679 }
Owen Taylor3473f882001-02-23 17:55:21 +00004680 } else {
4681 URI = xmlParseExternalID(ctxt, &literal, 1);
4682 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004683 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 if (URI) {
4686 xmlURIPtr uri;
4687
4688 uri = xmlParseURI((const char *)URI);
4689 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004690 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4691 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004692 /*
4693 * This really ought to be a well formedness error
4694 * but the XML Core WG decided otherwise c.f. issue
4695 * E26 of the XML erratas.
4696 */
Owen Taylor3473f882001-02-23 17:55:21 +00004697 } else {
4698 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004699 /*
4700 * Okay this is foolish to block those but not
4701 * invalid URIs.
4702 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004703 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
4705 xmlFreeURI(uri);
4706 }
4707 }
William M. Brack76e95df2003-10-18 16:20:14 +00004708 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4710 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004711 }
4712 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004713 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004714 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004715 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4717 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004720 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004721 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4722 (ctxt->sax->unparsedEntityDecl != NULL))
4723 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4724 literal, URI, ndata);
4725 } else {
4726 if ((ctxt->sax != NULL) &&
4727 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4728 ctxt->sax->entityDecl(ctxt->userData, name,
4729 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4730 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004731 /*
4732 * For expat compatibility in SAX mode.
4733 * assuming the entity repalcement was asked for
4734 */
4735 if ((ctxt->replaceEntities != 0) &&
4736 ((ctxt->myDoc == NULL) ||
4737 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4738 if (ctxt->myDoc == NULL) {
4739 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004740 if (ctxt->myDoc == NULL) {
4741 xmlErrMemory(ctxt, "New Doc failed");
4742 return;
4743 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004744 }
4745
4746 if (ctxt->myDoc->intSubset == NULL)
4747 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4748 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004749 xmlSAX2EntityDecl(ctxt, name,
4750 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4751 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004752 }
Owen Taylor3473f882001-02-23 17:55:21 +00004753 }
4754 }
4755 }
4756 SKIP_BLANKS;
4757 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004758 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004759 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 } else {
4761 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4763 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004764 }
4765 NEXT;
4766 }
4767 if (orig != NULL) {
4768 /*
4769 * Ugly mechanism to save the raw entity value.
4770 */
4771 xmlEntityPtr cur = NULL;
4772
4773 if (isParameter) {
4774 if ((ctxt->sax != NULL) &&
4775 (ctxt->sax->getParameterEntity != NULL))
4776 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4777 } else {
4778 if ((ctxt->sax != NULL) &&
4779 (ctxt->sax->getEntity != NULL))
4780 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004781 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004782 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004783 }
Owen Taylor3473f882001-02-23 17:55:21 +00004784 }
4785 if (cur != NULL) {
4786 if (cur->orig != NULL)
4787 xmlFree(orig);
4788 else
4789 cur->orig = orig;
4790 } else
4791 xmlFree(orig);
4792 }
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (value != NULL) xmlFree(value);
4794 if (URI != NULL) xmlFree(URI);
4795 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
4797}
4798
4799/**
4800 * xmlParseDefaultDecl:
4801 * @ctxt: an XML parser context
4802 * @value: Receive a possible fixed default value for the attribute
4803 *
4804 * Parse an attribute default declaration
4805 *
4806 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4807 *
4808 * [ VC: Required Attribute ]
4809 * if the default declaration is the keyword #REQUIRED, then the
4810 * attribute must be specified for all elements of the type in the
4811 * attribute-list declaration.
4812 *
4813 * [ VC: Attribute Default Legal ]
4814 * The declared default value must meet the lexical constraints of
4815 * the declared attribute type c.f. xmlValidateAttributeDecl()
4816 *
4817 * [ VC: Fixed Attribute Default ]
4818 * if an attribute has a default value declared with the #FIXED
4819 * keyword, instances of that attribute must match the default value.
4820 *
4821 * [ WFC: No < in Attribute Values ]
4822 * handled in xmlParseAttValue()
4823 *
4824 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4825 * or XML_ATTRIBUTE_FIXED.
4826 */
4827
4828int
4829xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4830 int val;
4831 xmlChar *ret;
4832
4833 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004834 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004835 SKIP(9);
4836 return(XML_ATTRIBUTE_REQUIRED);
4837 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004838 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004839 SKIP(8);
4840 return(XML_ATTRIBUTE_IMPLIED);
4841 }
4842 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(6);
4845 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
4850 SKIP_BLANKS;
4851 }
4852 ret = xmlParseAttValue(ctxt);
4853 ctxt->instate = XML_PARSER_DTD;
4854 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004855 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004856 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 } else
4858 *value = ret;
4859 return(val);
4860}
4861
4862/**
4863 * xmlParseNotationType:
4864 * @ctxt: an XML parser context
4865 *
4866 * parse an Notation attribute type.
4867 *
4868 * Note: the leading 'NOTATION' S part has already being parsed...
4869 *
4870 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4871 *
4872 * [ VC: Notation Attributes ]
4873 * Values of this type must match one of the notation names included
4874 * in the declaration; all notation names in the declaration must be declared.
4875 *
4876 * Returns: the notation attribute tree built while parsing
4877 */
4878
4879xmlEnumerationPtr
4880xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004881 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004882 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4883
4884 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004885 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(NULL);
4887 }
4888 SHRINK;
4889 do {
4890 NEXT;
4891 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004892 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004893 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004894 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4895 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004896 return(ret);
4897 }
4898 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (cur == NULL) return(ret);
4900 if (last == NULL) ret = last = cur;
4901 else {
4902 last->next = cur;
4903 last = cur;
4904 }
4905 SKIP_BLANKS;
4906 } while (RAW == '|');
4907 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004908 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 if ((last != NULL) && (last != ret))
4910 xmlFreeEnumeration(last);
4911 return(ret);
4912 }
4913 NEXT;
4914 return(ret);
4915}
4916
4917/**
4918 * xmlParseEnumerationType:
4919 * @ctxt: an XML parser context
4920 *
4921 * parse an Enumeration attribute type.
4922 *
4923 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4924 *
4925 * [ VC: Enumeration ]
4926 * Values of this type must match one of the Nmtoken tokens in
4927 * the declaration
4928 *
4929 * Returns: the enumeration attribute tree built while parsing
4930 */
4931
4932xmlEnumerationPtr
4933xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4934 xmlChar *name;
4935 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4936
4937 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004938 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004939 return(NULL);
4940 }
4941 SHRINK;
4942 do {
4943 NEXT;
4944 SKIP_BLANKS;
4945 name = xmlParseNmtoken(ctxt);
4946 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004947 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004948 return(ret);
4949 }
4950 cur = xmlCreateEnumeration(name);
4951 xmlFree(name);
4952 if (cur == NULL) return(ret);
4953 if (last == NULL) ret = last = cur;
4954 else {
4955 last->next = cur;
4956 last = cur;
4957 }
4958 SKIP_BLANKS;
4959 } while (RAW == '|');
4960 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004961 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 return(ret);
4963 }
4964 NEXT;
4965 return(ret);
4966}
4967
4968/**
4969 * xmlParseEnumeratedType:
4970 * @ctxt: an XML parser context
4971 * @tree: the enumeration tree built while parsing
4972 *
4973 * parse an Enumerated attribute type.
4974 *
4975 * [57] EnumeratedType ::= NotationType | Enumeration
4976 *
4977 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4978 *
4979 *
4980 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4981 */
4982
4983int
4984xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004985 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004986 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004987 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004988 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4989 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004990 return(0);
4991 }
4992 SKIP_BLANKS;
4993 *tree = xmlParseNotationType(ctxt);
4994 if (*tree == NULL) return(0);
4995 return(XML_ATTRIBUTE_NOTATION);
4996 }
4997 *tree = xmlParseEnumerationType(ctxt);
4998 if (*tree == NULL) return(0);
4999 return(XML_ATTRIBUTE_ENUMERATION);
5000}
5001
5002/**
5003 * xmlParseAttributeType:
5004 * @ctxt: an XML parser context
5005 * @tree: the enumeration tree built while parsing
5006 *
5007 * parse the Attribute list def for an element
5008 *
5009 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5010 *
5011 * [55] StringType ::= 'CDATA'
5012 *
5013 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5014 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5015 *
5016 * Validity constraints for attribute values syntax are checked in
5017 * xmlValidateAttributeValue()
5018 *
5019 * [ VC: ID ]
5020 * Values of type ID must match the Name production. A name must not
5021 * appear more than once in an XML document as a value of this type;
5022 * i.e., ID values must uniquely identify the elements which bear them.
5023 *
5024 * [ VC: One ID per Element Type ]
5025 * No element type may have more than one ID attribute specified.
5026 *
5027 * [ VC: ID Attribute Default ]
5028 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5029 *
5030 * [ VC: IDREF ]
5031 * Values of type IDREF must match the Name production, and values
5032 * of type IDREFS must match Names; each IDREF Name must match the value
5033 * of an ID attribute on some element in the XML document; i.e. IDREF
5034 * values must match the value of some ID attribute.
5035 *
5036 * [ VC: Entity Name ]
5037 * Values of type ENTITY must match the Name production, values
5038 * of type ENTITIES must match Names; each Entity Name must match the
5039 * name of an unparsed entity declared in the DTD.
5040 *
5041 * [ VC: Name Token ]
5042 * Values of type NMTOKEN must match the Nmtoken production; values
5043 * of type NMTOKENS must match Nmtokens.
5044 *
5045 * Returns the attribute type
5046 */
5047int
5048xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5049 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005050 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005051 SKIP(5);
5052 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005053 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005054 SKIP(6);
5055 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005056 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005057 SKIP(5);
5058 return(XML_ATTRIBUTE_IDREF);
5059 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5060 SKIP(2);
5061 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005062 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005063 SKIP(6);
5064 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005065 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005066 SKIP(8);
5067 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005068 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005069 SKIP(8);
5070 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005071 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005072 SKIP(7);
5073 return(XML_ATTRIBUTE_NMTOKEN);
5074 }
5075 return(xmlParseEnumeratedType(ctxt, tree));
5076}
5077
5078/**
5079 * xmlParseAttributeListDecl:
5080 * @ctxt: an XML parser context
5081 *
5082 * : parse the Attribute list def for an element
5083 *
5084 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5085 *
5086 * [53] AttDef ::= S Name S AttType S DefaultDecl
5087 *
5088 */
5089void
5090xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005091 const xmlChar *elemName;
5092 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005093 xmlEnumerationPtr tree;
5094
Daniel Veillarda07050d2003-10-19 14:46:32 +00005095 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005096 xmlParserInputPtr input = ctxt->input;
5097
5098 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005099 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005101 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005104 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005105 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5107 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005108 return;
5109 }
5110 SKIP_BLANKS;
5111 GROW;
5112 while (RAW != '>') {
5113 const xmlChar *check = CUR_PTR;
5114 int type;
5115 int def;
5116 xmlChar *defaultValue = NULL;
5117
5118 GROW;
5119 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005120 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005121 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5123 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005124 break;
5125 }
5126 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005127 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005129 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005130 break;
5131 }
5132 SKIP_BLANKS;
5133
5134 type = xmlParseAttributeType(ctxt, &tree);
5135 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005136 break;
5137 }
5138
5139 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005140 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5142 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005143 if (tree != NULL)
5144 xmlFreeEnumeration(tree);
5145 break;
5146 }
5147 SKIP_BLANKS;
5148
5149 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5150 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005151 if (defaultValue != NULL)
5152 xmlFree(defaultValue);
5153 if (tree != NULL)
5154 xmlFreeEnumeration(tree);
5155 break;
5156 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005157 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5158 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005159
5160 GROW;
5161 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005162 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005163 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005164 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005165 if (defaultValue != NULL)
5166 xmlFree(defaultValue);
5167 if (tree != NULL)
5168 xmlFreeEnumeration(tree);
5169 break;
5170 }
5171 SKIP_BLANKS;
5172 }
5173 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005174 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5175 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005176 if (defaultValue != NULL)
5177 xmlFree(defaultValue);
5178 if (tree != NULL)
5179 xmlFreeEnumeration(tree);
5180 break;
5181 }
5182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5183 (ctxt->sax->attributeDecl != NULL))
5184 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5185 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005186 else if (tree != NULL)
5187 xmlFreeEnumeration(tree);
5188
5189 if ((ctxt->sax2) && (defaultValue != NULL) &&
5190 (def != XML_ATTRIBUTE_IMPLIED) &&
5191 (def != XML_ATTRIBUTE_REQUIRED)) {
5192 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5193 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005194 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005195 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5196 }
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (defaultValue != NULL)
5198 xmlFree(defaultValue);
5199 GROW;
5200 }
5201 if (RAW == '>') {
5202 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005203 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5204 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206 NEXT;
5207 }
Owen Taylor3473f882001-02-23 17:55:21 +00005208 }
5209}
5210
5211/**
5212 * xmlParseElementMixedContentDecl:
5213 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005214 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005215 *
5216 * parse the declaration for a Mixed Element content
5217 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5218 *
5219 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5220 * '(' S? '#PCDATA' S? ')'
5221 *
5222 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5223 *
5224 * [ VC: No Duplicate Types ]
5225 * The same name must not appear more than once in a single
5226 * mixed-content declaration.
5227 *
5228 * returns: the list of the xmlElementContentPtr describing the element choices
5229 */
5230xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005231xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005232 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005233 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005234
5235 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005236 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005237 SKIP(7);
5238 SKIP_BLANKS;
5239 SHRINK;
5240 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005241 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005242 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5243"Element content declaration doesn't start and stop in the same entity\n",
5244 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005245 }
Owen Taylor3473f882001-02-23 17:55:21 +00005246 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005247 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005248 if (ret == NULL)
5249 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 if (RAW == '*') {
5251 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5252 NEXT;
5253 }
5254 return(ret);
5255 }
5256 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005257 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 if (ret == NULL) return(NULL);
5259 }
5260 while (RAW == '|') {
5261 NEXT;
5262 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 if (ret == NULL) return(NULL);
5265 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005266 if (cur != NULL)
5267 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 cur = ret;
5269 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005272 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005273 if (n->c1 != NULL)
5274 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005276 if (n != NULL)
5277 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005281 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005284 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005285 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 return(NULL);
5287 }
5288 SKIP_BLANKS;
5289 GROW;
5290 }
5291 if ((RAW == ')') && (NXT(1) == '*')) {
5292 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005293 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005294 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005295 if (cur->c2 != NULL)
5296 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005297 }
5298 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005300 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5301"Element content declaration doesn't start and stop in the same entity\n",
5302 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005303 }
Owen Taylor3473f882001-02-23 17:55:21 +00005304 SKIP(2);
5305 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005306 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 return(NULL);
5309 }
5310
5311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005312 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
5314 return(ret);
5315}
5316
5317/**
5318 * xmlParseElementChildrenContentDecl:
5319 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005320 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005321 *
5322 * parse the declaration for a Mixed Element content
5323 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5324 *
5325 *
5326 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5327 *
5328 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5329 *
5330 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5331 *
5332 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5333 *
5334 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5335 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005336 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005337 * opening or closing parentheses in a choice, seq, or Mixed
5338 * construct is contained in the replacement text for a parameter
5339 * entity, both must be contained in the same replacement text. For
5340 * interoperability, if a parameter-entity reference appears in a
5341 * choice, seq, or Mixed construct, its replacement text should not
5342 * be empty, and neither the first nor last non-blank character of
5343 * the replacement text should be a connector (| or ,).
5344 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005345 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005346 * hierarchy.
5347 */
5348xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005349xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005351 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005352 xmlChar type = 0;
5353
5354 SKIP_BLANKS;
5355 GROW;
5356 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005358
Owen Taylor3473f882001-02-23 17:55:21 +00005359 /* Recurse on first child */
5360 NEXT;
5361 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005362 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 SKIP_BLANKS;
5364 GROW;
5365 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005366 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005367 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005368 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return(NULL);
5370 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005371 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005372 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005373 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005374 return(NULL);
5375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376 GROW;
5377 if (RAW == '?') {
5378 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5379 NEXT;
5380 } else if (RAW == '*') {
5381 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5382 NEXT;
5383 } else if (RAW == '+') {
5384 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5385 NEXT;
5386 } else {
5387 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5388 }
Owen Taylor3473f882001-02-23 17:55:21 +00005389 GROW;
5390 }
5391 SKIP_BLANKS;
5392 SHRINK;
5393 while (RAW != ')') {
5394 /*
5395 * Each loop we parse one separator and one element.
5396 */
5397 if (RAW == ',') {
5398 if (type == 0) type = CUR;
5399
5400 /*
5401 * Detect "Name | Name , Name" error
5402 */
5403 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005405 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005406 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005407 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005408 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005410 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 return(NULL);
5412 }
5413 NEXT;
5414
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005415 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005416 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005417 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005418 xmlFreeDocElementContent(ctxt->myDoc, last);
5419 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 return(NULL);
5421 }
5422 if (last == NULL) {
5423 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005424 if (ret != NULL)
5425 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 ret = cur = op;
5427 } else {
5428 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005429 if (op != NULL)
5430 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005431 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005432 if (last != NULL)
5433 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005434 cur =op;
5435 last = NULL;
5436 }
5437 } else if (RAW == '|') {
5438 if (type == 0) type = CUR;
5439
5440 /*
5441 * Detect "Name , Name | Name" error
5442 */
5443 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005444 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005445 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005446 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005447 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005448 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005449 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005450 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 return(NULL);
5452 }
5453 NEXT;
5454
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005455 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005457 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005458 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005460 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 return(NULL);
5462 }
5463 if (last == NULL) {
5464 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005465 if (ret != NULL)
5466 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005467 ret = cur = op;
5468 } else {
5469 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005470 if (op != NULL)
5471 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005473 if (last != NULL)
5474 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005475 cur =op;
5476 last = NULL;
5477 }
5478 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005479 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005480 if ((last != NULL) && (last != ret))
5481 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005483 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 return(NULL);
5485 }
5486 GROW;
5487 SKIP_BLANKS;
5488 GROW;
5489 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005490 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005491 /* Recurse on second child */
5492 NEXT;
5493 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005494 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005495 SKIP_BLANKS;
5496 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005497 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005499 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005501 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005502 return(NULL);
5503 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005504 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005505 if (last == NULL) {
5506 if (ret != NULL)
5507 xmlFreeDocElementContent(ctxt->myDoc, ret);
5508 return(NULL);
5509 }
Owen Taylor3473f882001-02-23 17:55:21 +00005510 if (RAW == '?') {
5511 last->ocur = XML_ELEMENT_CONTENT_OPT;
5512 NEXT;
5513 } else if (RAW == '*') {
5514 last->ocur = XML_ELEMENT_CONTENT_MULT;
5515 NEXT;
5516 } else if (RAW == '+') {
5517 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5518 NEXT;
5519 } else {
5520 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5521 }
5522 }
5523 SKIP_BLANKS;
5524 GROW;
5525 }
5526 if ((cur != NULL) && (last != NULL)) {
5527 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005528 if (last != NULL)
5529 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005531 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005532 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5533"Element content declaration doesn't start and stop in the same entity\n",
5534 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005535 }
Owen Taylor3473f882001-02-23 17:55:21 +00005536 NEXT;
5537 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005538 if (ret != NULL) {
5539 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5540 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5541 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5542 else
5543 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5544 }
Owen Taylor3473f882001-02-23 17:55:21 +00005545 NEXT;
5546 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005547 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005548 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005549 cur = ret;
5550 /*
5551 * Some normalization:
5552 * (a | b* | c?)* == (a | b | c)*
5553 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005554 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005555 if ((cur->c1 != NULL) &&
5556 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5557 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5558 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5559 if ((cur->c2 != NULL) &&
5560 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5561 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5562 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5563 cur = cur->c2;
5564 }
5565 }
Owen Taylor3473f882001-02-23 17:55:21 +00005566 NEXT;
5567 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005568 if (ret != NULL) {
5569 int found = 0;
5570
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005571 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5572 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005574 else
5575 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005576 /*
5577 * Some normalization:
5578 * (a | b*)+ == (a | b)*
5579 * (a | b?)+ == (a | b)*
5580 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005581 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005582 if ((cur->c1 != NULL) &&
5583 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5584 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5585 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5586 found = 1;
5587 }
5588 if ((cur->c2 != NULL) &&
5589 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5590 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5591 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5592 found = 1;
5593 }
5594 cur = cur->c2;
5595 }
5596 if (found)
5597 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5598 }
Owen Taylor3473f882001-02-23 17:55:21 +00005599 NEXT;
5600 }
5601 return(ret);
5602}
5603
5604/**
5605 * xmlParseElementContentDecl:
5606 * @ctxt: an XML parser context
5607 * @name: the name of the element being defined.
5608 * @result: the Element Content pointer will be stored here if any
5609 *
5610 * parse the declaration for an Element content either Mixed or Children,
5611 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5612 *
5613 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5614 *
5615 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5616 */
5617
5618int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005619xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005620 xmlElementContentPtr *result) {
5621
5622 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005623 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005624 int res;
5625
5626 *result = NULL;
5627
5628 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005629 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005630 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005631 return(-1);
5632 }
5633 NEXT;
5634 GROW;
5635 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005636 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005637 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005638 res = XML_ELEMENT_TYPE_MIXED;
5639 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005641 res = XML_ELEMENT_TYPE_ELEMENT;
5642 }
Owen Taylor3473f882001-02-23 17:55:21 +00005643 SKIP_BLANKS;
5644 *result = tree;
5645 return(res);
5646}
5647
5648/**
5649 * xmlParseElementDecl:
5650 * @ctxt: an XML parser context
5651 *
5652 * parse an Element declaration.
5653 *
5654 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5655 *
5656 * [ VC: Unique Element Type Declaration ]
5657 * No element type may be declared more than once
5658 *
5659 * Returns the type of the element, or -1 in case of error
5660 */
5661int
5662xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005663 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005664 int ret = -1;
5665 xmlElementContentPtr content = NULL;
5666
Daniel Veillard4c778d82005-01-23 17:37:44 +00005667 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005668 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005669 xmlParserInputPtr input = ctxt->input;
5670
5671 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005672 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005675 }
5676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5680 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005681 return(-1);
5682 }
5683 while ((RAW == 0) && (ctxt->inputNr > 1))
5684 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005685 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5687 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005688 }
5689 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(5);
5692 /*
5693 * Element must always be empty.
5694 */
5695 ret = XML_ELEMENT_TYPE_EMPTY;
5696 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5697 (NXT(2) == 'Y')) {
5698 SKIP(3);
5699 /*
5700 * Element is a generic container.
5701 */
5702 ret = XML_ELEMENT_TYPE_ANY;
5703 } else if (RAW == '(') {
5704 ret = xmlParseElementContentDecl(ctxt, name, &content);
5705 } else {
5706 /*
5707 * [ WFC: PEs in Internal Subset ] error handling.
5708 */
5709 if ((RAW == '%') && (ctxt->external == 0) &&
5710 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005711 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005712 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005713 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005714 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005715 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5716 }
Owen Taylor3473f882001-02-23 17:55:21 +00005717 return(-1);
5718 }
5719
5720 SKIP_BLANKS;
5721 /*
5722 * Pop-up of finished entities.
5723 */
5724 while ((RAW == 0) && (ctxt->inputNr > 1))
5725 xmlPopInput(ctxt);
5726 SKIP_BLANKS;
5727
5728 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005729 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 if (content != NULL) {
5731 xmlFreeDocElementContent(ctxt->myDoc, content);
5732 }
Owen Taylor3473f882001-02-23 17:55:21 +00005733 } else {
5734 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5736 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738
5739 NEXT;
5740 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005741 (ctxt->sax->elementDecl != NULL)) {
5742 if (content != NULL)
5743 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5745 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005746 if ((content != NULL) && (content->parent == NULL)) {
5747 /*
5748 * this is a trick: if xmlAddElementDecl is called,
5749 * instead of copying the full tree it is plugged directly
5750 * if called from the parser. Avoid duplicating the
5751 * interfaces or change the API/ABI
5752 */
5753 xmlFreeDocElementContent(ctxt->myDoc, content);
5754 }
5755 } else if (content != NULL) {
5756 xmlFreeDocElementContent(ctxt->myDoc, content);
5757 }
Owen Taylor3473f882001-02-23 17:55:21 +00005758 }
Owen Taylor3473f882001-02-23 17:55:21 +00005759 }
5760 return(ret);
5761}
5762
5763/**
Owen Taylor3473f882001-02-23 17:55:21 +00005764 * xmlParseConditionalSections
5765 * @ctxt: an XML parser context
5766 *
5767 * [61] conditionalSect ::= includeSect | ignoreSect
5768 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5769 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5770 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5771 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5772 */
5773
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005774static void
Owen Taylor3473f882001-02-23 17:55:21 +00005775xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5776 SKIP(3);
5777 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005778 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005779 SKIP(7);
5780 SKIP_BLANKS;
5781 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005782 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 } else {
5784 NEXT;
5785 }
5786 if (xmlParserDebugEntities) {
5787 if ((ctxt->input != NULL) && (ctxt->input->filename))
5788 xmlGenericError(xmlGenericErrorContext,
5789 "%s(%d): ", ctxt->input->filename,
5790 ctxt->input->line);
5791 xmlGenericError(xmlGenericErrorContext,
5792 "Entering INCLUDE Conditional Section\n");
5793 }
5794
5795 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5796 (NXT(2) != '>'))) {
5797 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005798 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5801 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005802 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005803 NEXT;
5804 } else if (RAW == '%') {
5805 xmlParsePEReference(ctxt);
5806 } else
5807 xmlParseMarkupDecl(ctxt);
5808
5809 /*
5810 * Pop-up of finished entities.
5811 */
5812 while ((RAW == 0) && (ctxt->inputNr > 1))
5813 xmlPopInput(ctxt);
5814
Daniel Veillardfdc91562002-07-01 21:52:03 +00005815 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005816 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005817 break;
5818 }
5819 }
5820 if (xmlParserDebugEntities) {
5821 if ((ctxt->input != NULL) && (ctxt->input->filename))
5822 xmlGenericError(xmlGenericErrorContext,
5823 "%s(%d): ", ctxt->input->filename,
5824 ctxt->input->line);
5825 xmlGenericError(xmlGenericErrorContext,
5826 "Leaving INCLUDE Conditional Section\n");
5827 }
5828
Daniel Veillarda07050d2003-10-19 14:46:32 +00005829 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005830 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005831 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005832 int depth = 0;
5833
5834 SKIP(6);
5835 SKIP_BLANKS;
5836 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005837 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 } else {
5839 NEXT;
5840 }
5841 if (xmlParserDebugEntities) {
5842 if ((ctxt->input != NULL) && (ctxt->input->filename))
5843 xmlGenericError(xmlGenericErrorContext,
5844 "%s(%d): ", ctxt->input->filename,
5845 ctxt->input->line);
5846 xmlGenericError(xmlGenericErrorContext,
5847 "Entering IGNORE Conditional Section\n");
5848 }
5849
5850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005851 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005852 * But disable SAX event generating DTD building in the meantime
5853 */
5854 state = ctxt->disableSAX;
5855 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005857 ctxt->instate = XML_PARSER_IGNORE;
5858
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005859 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5861 depth++;
5862 SKIP(3);
5863 continue;
5864 }
5865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5866 if (--depth >= 0) SKIP(3);
5867 continue;
5868 }
5869 NEXT;
5870 continue;
5871 }
5872
5873 ctxt->disableSAX = state;
5874 ctxt->instate = instate;
5875
5876 if (xmlParserDebugEntities) {
5877 if ((ctxt->input != NULL) && (ctxt->input->filename))
5878 xmlGenericError(xmlGenericErrorContext,
5879 "%s(%d): ", ctxt->input->filename,
5880 ctxt->input->line);
5881 xmlGenericError(xmlGenericErrorContext,
5882 "Leaving IGNORE Conditional Section\n");
5883 }
5884
5885 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 }
5888
5889 if (RAW == 0)
5890 SHRINK;
5891
5892 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 } else {
5895 SKIP(3);
5896 }
5897}
5898
5899/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005900 * xmlParseMarkupDecl:
5901 * @ctxt: an XML parser context
5902 *
5903 * parse Markup declarations
5904 *
5905 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5906 * NotationDecl | PI | Comment
5907 *
5908 * [ VC: Proper Declaration/PE Nesting ]
5909 * Parameter-entity replacement text must be properly nested with
5910 * markup declarations. That is to say, if either the first character
5911 * or the last character of a markup declaration (markupdecl above) is
5912 * contained in the replacement text for a parameter-entity reference,
5913 * both must be contained in the same replacement text.
5914 *
5915 * [ WFC: PEs in Internal Subset ]
5916 * In the internal DTD subset, parameter-entity references can occur
5917 * only where markup declarations can occur, not within markup declarations.
5918 * (This does not apply to references that occur in external parameter
5919 * entities or to the external subset.)
5920 */
5921void
5922xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5923 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005924 if (CUR == '<') {
5925 if (NXT(1) == '!') {
5926 switch (NXT(2)) {
5927 case 'E':
5928 if (NXT(3) == 'L')
5929 xmlParseElementDecl(ctxt);
5930 else if (NXT(3) == 'N')
5931 xmlParseEntityDecl(ctxt);
5932 break;
5933 case 'A':
5934 xmlParseAttributeListDecl(ctxt);
5935 break;
5936 case 'N':
5937 xmlParseNotationDecl(ctxt);
5938 break;
5939 case '-':
5940 xmlParseComment(ctxt);
5941 break;
5942 default:
5943 /* there is an error but it will be detected later */
5944 break;
5945 }
5946 } else if (NXT(1) == '?') {
5947 xmlParsePI(ctxt);
5948 }
5949 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005950 /*
5951 * This is only for internal subset. On external entities,
5952 * the replacement is done before parsing stage
5953 */
5954 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5955 xmlParsePEReference(ctxt);
5956
5957 /*
5958 * Conditional sections are allowed from entities included
5959 * by PE References in the internal subset.
5960 */
5961 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5962 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5963 xmlParseConditionalSections(ctxt);
5964 }
5965 }
5966
5967 ctxt->instate = XML_PARSER_DTD;
5968}
5969
5970/**
5971 * xmlParseTextDecl:
5972 * @ctxt: an XML parser context
5973 *
5974 * parse an XML declaration header for external entities
5975 *
5976 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5977 *
5978 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5979 */
5980
5981void
5982xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5983 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005984 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005985
5986 /*
5987 * We know that '<?xml' is here.
5988 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005989 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005990 SKIP(5);
5991 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005992 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005993 return;
5994 }
5995
William M. Brack76e95df2003-10-18 16:20:14 +00005996 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5998 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005999 }
6000 SKIP_BLANKS;
6001
6002 /*
6003 * We may have the VersionInfo here.
6004 */
6005 version = xmlParseVersionInfo(ctxt);
6006 if (version == NULL)
6007 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006008 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006009 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6011 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006012 }
6013 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006014 ctxt->input->version = version;
6015
6016 /*
6017 * We must have the encoding declaration
6018 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006019 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6021 /*
6022 * The XML REC instructs us to stop parsing right here
6023 */
6024 return;
6025 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006026 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6027 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6028 "Missing encoding in text declaration\n");
6029 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006030
6031 SKIP_BLANKS;
6032 if ((RAW == '?') && (NXT(1) == '>')) {
6033 SKIP(2);
6034 } else if (RAW == '>') {
6035 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006037 NEXT;
6038 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006040 MOVETO_ENDTAG(CUR_PTR);
6041 NEXT;
6042 }
6043}
6044
6045/**
Owen Taylor3473f882001-02-23 17:55:21 +00006046 * xmlParseExternalSubset:
6047 * @ctxt: an XML parser context
6048 * @ExternalID: the external identifier
6049 * @SystemID: the system identifier (or URL)
6050 *
6051 * parse Markup declarations from an external subset
6052 *
6053 * [30] extSubset ::= textDecl? extSubsetDecl
6054 *
6055 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6056 */
6057void
6058xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6059 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006060 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006062
6063 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6064 (ctxt->input->end - ctxt->input->cur >= 4)) {
6065 xmlChar start[4];
6066 xmlCharEncoding enc;
6067
6068 start[0] = RAW;
6069 start[1] = NXT(1);
6070 start[2] = NXT(2);
6071 start[3] = NXT(3);
6072 enc = xmlDetectCharEncoding(start, 4);
6073 if (enc != XML_CHAR_ENCODING_NONE)
6074 xmlSwitchEncoding(ctxt, enc);
6075 }
6076
Daniel Veillarda07050d2003-10-19 14:46:32 +00006077 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006078 xmlParseTextDecl(ctxt);
6079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6080 /*
6081 * The XML REC instructs us to stop parsing right here
6082 */
6083 ctxt->instate = XML_PARSER_EOF;
6084 return;
6085 }
6086 }
6087 if (ctxt->myDoc == NULL) {
6088 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6089 }
6090 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6091 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6092
6093 ctxt->instate = XML_PARSER_DTD;
6094 ctxt->external = 1;
6095 while (((RAW == '<') && (NXT(1) == '?')) ||
6096 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006097 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006098 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006099 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006100
6101 GROW;
6102 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6103 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006104 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006105 NEXT;
6106 } else if (RAW == '%') {
6107 xmlParsePEReference(ctxt);
6108 } else
6109 xmlParseMarkupDecl(ctxt);
6110
6111 /*
6112 * Pop-up of finished entities.
6113 */
6114 while ((RAW == 0) && (ctxt->inputNr > 1))
6115 xmlPopInput(ctxt);
6116
Daniel Veillardfdc91562002-07-01 21:52:03 +00006117 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006118 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006119 break;
6120 }
6121 }
6122
6123 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006124 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006125 }
6126
6127}
6128
6129/**
6130 * xmlParseReference:
6131 * @ctxt: an XML parser context
6132 *
6133 * parse and handle entity references in content, depending on the SAX
6134 * interface, this may end-up in a call to character() if this is a
6135 * CharRef, a predefined entity, if there is no reference() callback.
6136 * or if the parser was asked to switch to that mode.
6137 *
6138 * [67] Reference ::= EntityRef | CharRef
6139 */
6140void
6141xmlParseReference(xmlParserCtxtPtr ctxt) {
6142 xmlEntityPtr ent;
6143 xmlChar *val;
6144 if (RAW != '&') return;
6145
6146 if (NXT(1) == '#') {
6147 int i = 0;
6148 xmlChar out[10];
6149 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006150 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006151
Daniel Veillarddc171602008-03-26 17:41:38 +00006152 if (value == 0)
6153 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006154 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6155 /*
6156 * So we are using non-UTF-8 buffers
6157 * Check that the char fit on 8bits, if not
6158 * generate a CharRef.
6159 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006160 if (value <= 0xFF) {
6161 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006162 out[1] = 0;
6163 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6164 (!ctxt->disableSAX))
6165 ctxt->sax->characters(ctxt->userData, out, 1);
6166 } else {
6167 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006168 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006169 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006170 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006171 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6172 (!ctxt->disableSAX))
6173 ctxt->sax->reference(ctxt->userData, out);
6174 }
6175 } else {
6176 /*
6177 * Just encode the value in UTF-8
6178 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006179 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006180 out[i] = 0;
6181 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6182 (!ctxt->disableSAX))
6183 ctxt->sax->characters(ctxt->userData, out, i);
6184 }
6185 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006186 int was_checked;
6187
Owen Taylor3473f882001-02-23 17:55:21 +00006188 ent = xmlParseEntityRef(ctxt);
6189 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006190 if (!ctxt->wellFormed)
6191 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006192 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006193 if ((ent->name != NULL) &&
6194 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6195 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006196 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006197
6198
6199 /*
6200 * The first reference to the entity trigger a parsing phase
6201 * where the ent->children is filled with the result from
6202 * the parsing.
6203 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006204 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006205 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006206
Owen Taylor3473f882001-02-23 17:55:21 +00006207 value = ent->content;
6208
6209 /*
6210 * Check that this entity is well formed
6211 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006212 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006213 (value[1] == 0) && (value[0] == '<') &&
6214 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6215 /*
6216 * DONE: get definite answer on this !!!
6217 * Lots of entity decls are used to declare a single
6218 * char
6219 * <!ENTITY lt "<">
6220 * Which seems to be valid since
6221 * 2.4: The ampersand character (&) and the left angle
6222 * bracket (<) may appear in their literal form only
6223 * when used ... They are also legal within the literal
6224 * entity value of an internal entity declaration;i
6225 * see "4.3.2 Well-Formed Parsed Entities".
6226 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6227 * Looking at the OASIS test suite and James Clark
6228 * tests, this is broken. However the XML REC uses
6229 * it. Is the XML REC not well-formed ????
6230 * This is a hack to avoid this problem
6231 *
6232 * ANSWER: since lt gt amp .. are already defined,
6233 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006234 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006235 * is lousy but acceptable.
6236 */
6237 list = xmlNewDocText(ctxt->myDoc, value);
6238 if (list != NULL) {
6239 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6240 (ent->children == NULL)) {
6241 ent->children = list;
6242 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006243 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006244 list->parent = (xmlNodePtr) ent;
6245 } else {
6246 xmlFreeNodeList(list);
6247 }
6248 } else if (list != NULL) {
6249 xmlFreeNodeList(list);
6250 }
6251 } else {
6252 /*
6253 * 4.3.2: An internal general parsed entity is well-formed
6254 * if its replacement text matches the production labeled
6255 * content.
6256 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006257
6258 void *user_data;
6259 /*
6260 * This is a bit hackish but this seems the best
6261 * way to make sure both SAX and DOM entity support
6262 * behaves okay.
6263 */
6264 if (ctxt->userData == ctxt)
6265 user_data = NULL;
6266 else
6267 user_data = ctxt->userData;
6268
Owen Taylor3473f882001-02-23 17:55:21 +00006269 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6270 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006271 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6272 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 ctxt->depth--;
6274 } else if (ent->etype ==
6275 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6276 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006277 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006278 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006279 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006280 ctxt->depth--;
6281 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006282 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006283 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6284 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006285 }
6286 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006287 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006288 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006289 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006290 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6291 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006292 (ent->children == NULL)) {
6293 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006294 if (ctxt->replaceEntities) {
6295 /*
6296 * Prune it directly in the generated document
6297 * except for single text nodes.
6298 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006299 if (((list->type == XML_TEXT_NODE) &&
6300 (list->next == NULL)) ||
6301 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006302 list->parent = (xmlNodePtr) ent;
6303 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006304 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006305 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006306 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006307 while (list != NULL) {
6308 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006309 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006310 if (list->next == NULL)
6311 ent->last = list;
6312 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006313 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006314 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006315#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006316 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6317 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006318#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006319 }
6320 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006321 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006322 while (list != NULL) {
6323 list->parent = (xmlNodePtr) ent;
6324 if (list->next == NULL)
6325 ent->last = list;
6326 list = list->next;
6327 }
Owen Taylor3473f882001-02-23 17:55:21 +00006328 }
6329 } else {
6330 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006331 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006333 } else if ((ret != XML_ERR_OK) &&
6334 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006335 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6336 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 } else if (list != NULL) {
6338 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006339 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006340 }
6341 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006342 ent->checked = 1;
6343 }
6344
6345 if (ent->children == NULL) {
6346 /*
6347 * Probably running in SAX mode and the callbacks don't
6348 * build the entity content. So unless we already went
6349 * though parsing for first checking go though the entity
6350 * content to generate callbacks associated to the entity
6351 */
6352 if (was_checked == 1) {
6353 void *user_data;
6354 /*
6355 * This is a bit hackish but this seems the best
6356 * way to make sure both SAX and DOM entity support
6357 * behaves okay.
6358 */
6359 if (ctxt->userData == ctxt)
6360 user_data = NULL;
6361 else
6362 user_data = ctxt->userData;
6363
6364 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6365 ctxt->depth++;
6366 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6367 ent->content, user_data, NULL);
6368 ctxt->depth--;
6369 } else if (ent->etype ==
6370 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6371 ctxt->depth++;
6372 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6373 ctxt->sax, user_data, ctxt->depth,
6374 ent->URI, ent->ExternalID, NULL);
6375 ctxt->depth--;
6376 } else {
6377 ret = XML_ERR_ENTITY_PE_INTERNAL;
6378 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6379 "invalid entity type found\n", NULL);
6380 }
6381 if (ret == XML_ERR_ENTITY_LOOP) {
6382 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6383 return;
6384 }
6385 }
6386 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6387 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6388 /*
6389 * Entity reference callback comes second, it's somewhat
6390 * superfluous but a compatibility to historical behaviour
6391 */
6392 ctxt->sax->reference(ctxt->userData, ent->name);
6393 }
6394 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006395 }
6396 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006397 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006398 /*
6399 * Create a node.
6400 */
6401 ctxt->sax->reference(ctxt->userData, ent->name);
6402 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006403 }
6404 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006405 /*
6406 * There is a problem on the handling of _private for entities
6407 * (bug 155816): Should we copy the content of the field from
6408 * the entity (possibly overwriting some value set by the user
6409 * when a copy is created), should we leave it alone, or should
6410 * we try to take care of different situations? The problem
6411 * is exacerbated by the usage of this field by the xmlReader.
6412 * To fix this bug, we look at _private on the created node
6413 * and, if it's NULL, we copy in whatever was in the entity.
6414 * If it's not NULL we leave it alone. This is somewhat of a
6415 * hack - maybe we should have further tests to determine
6416 * what to do.
6417 */
Owen Taylor3473f882001-02-23 17:55:21 +00006418 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6419 /*
6420 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006421 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006422 * In the first occurrence list contains the replacement.
6423 * progressive == 2 means we are operating on the Reader
6424 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006425 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006426 if (((list == NULL) && (ent->owner == 0)) ||
6427 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006428 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006429
6430 /*
6431 * when operating on a reader, the entities definitions
6432 * are always owning the entities subtree.
6433 if (ctxt->parseMode == XML_PARSE_READER)
6434 ent->owner = 1;
6435 */
6436
Daniel Veillard62f313b2001-07-04 19:49:14 +00006437 cur = ent->children;
6438 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006439 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006440 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006441 if (nw->_private == NULL)
6442 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006443 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006444 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006445 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006446 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006447 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006448 if (cur == ent->last) {
6449 /*
6450 * needed to detect some strange empty
6451 * node cases in the reader tests
6452 */
6453 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006454 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006455 (nw->type == XML_ELEMENT_NODE) &&
6456 (nw->children == NULL))
6457 nw->extra = 1;
6458
Daniel Veillard62f313b2001-07-04 19:49:14 +00006459 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006460 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006461 cur = cur->next;
6462 }
Daniel Veillard81273902003-09-30 00:43:48 +00006463#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006464 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006465 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006466#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006467 } else if (list == NULL) {
6468 xmlNodePtr nw = NULL, cur, next, last,
6469 firstChild = NULL;
6470 /*
6471 * Copy the entity child list and make it the new
6472 * entity child list. The goal is to make sure any
6473 * ID or REF referenced will be the one from the
6474 * document content and not the entity copy.
6475 */
6476 cur = ent->children;
6477 ent->children = NULL;
6478 last = ent->last;
6479 ent->last = NULL;
6480 while (cur != NULL) {
6481 next = cur->next;
6482 cur->next = NULL;
6483 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006484 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006485 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006486 if (nw->_private == NULL)
6487 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006488 if (firstChild == NULL){
6489 firstChild = cur;
6490 }
6491 xmlAddChild((xmlNodePtr) ent, nw);
6492 xmlAddChild(ctxt->node, cur);
6493 }
6494 if (cur == last)
6495 break;
6496 cur = next;
6497 }
6498 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006499#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006500 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6501 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006502#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006503 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006504 const xmlChar *nbktext;
6505
Daniel Veillard62f313b2001-07-04 19:49:14 +00006506 /*
6507 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006508 * node with a possible previous text one which
6509 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006510 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006511 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6512 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006513 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006514 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006515 if ((ent->last != ent->children) &&
6516 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006517 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006518 xmlAddChildList(ctxt->node, ent->children);
6519 }
6520
Owen Taylor3473f882001-02-23 17:55:21 +00006521 /*
6522 * This is to avoid a nasty side effect, see
6523 * characters() in SAX.c
6524 */
6525 ctxt->nodemem = 0;
6526 ctxt->nodelen = 0;
6527 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 }
6529 }
6530 } else {
6531 val = ent->content;
6532 if (val == NULL) return;
6533 /*
6534 * inline the entity.
6535 */
6536 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6537 (!ctxt->disableSAX))
6538 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6539 }
6540 }
6541}
6542
6543/**
6544 * xmlParseEntityRef:
6545 * @ctxt: an XML parser context
6546 *
6547 * parse ENTITY references declarations
6548 *
6549 * [68] EntityRef ::= '&' Name ';'
6550 *
6551 * [ WFC: Entity Declared ]
6552 * In a document without any DTD, a document with only an internal DTD
6553 * subset which contains no parameter entity references, or a document
6554 * with "standalone='yes'", the Name given in the entity reference
6555 * must match that in an entity declaration, except that well-formed
6556 * documents need not declare any of the following entities: amp, lt,
6557 * gt, apos, quot. The declaration of a parameter entity must precede
6558 * any reference to it. Similarly, the declaration of a general entity
6559 * must precede any reference to it which appears in a default value in an
6560 * attribute-list declaration. Note that if entities are declared in the
6561 * external subset or in external parameter entities, a non-validating
6562 * processor is not obligated to read and process their declarations;
6563 * for such documents, the rule that an entity must be declared is a
6564 * well-formedness constraint only if standalone='yes'.
6565 *
6566 * [ WFC: Parsed Entity ]
6567 * An entity reference must not contain the name of an unparsed entity
6568 *
6569 * Returns the xmlEntityPtr if found, or NULL otherwise.
6570 */
6571xmlEntityPtr
6572xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006573 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006574 xmlEntityPtr ent = NULL;
6575
6576 GROW;
6577
6578 if (RAW == '&') {
6579 NEXT;
6580 name = xmlParseName(ctxt);
6581 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006582 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6583 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006584 } else {
6585 if (RAW == ';') {
6586 NEXT;
6587 /*
6588 * Ask first SAX for entity resolution, otherwise try the
6589 * predefined set.
6590 */
6591 if (ctxt->sax != NULL) {
6592 if (ctxt->sax->getEntity != NULL)
6593 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006594 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006595 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006596 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6597 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006598 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006599 }
Owen Taylor3473f882001-02-23 17:55:21 +00006600 }
6601 /*
6602 * [ WFC: Entity Declared ]
6603 * In a document without any DTD, a document with only an
6604 * internal DTD subset which contains no parameter entity
6605 * references, or a document with "standalone='yes'", the
6606 * Name given in the entity reference must match that in an
6607 * entity declaration, except that well-formed documents
6608 * need not declare any of the following entities: amp, lt,
6609 * gt, apos, quot.
6610 * The declaration of a parameter entity must precede any
6611 * reference to it.
6612 * Similarly, the declaration of a general entity must
6613 * precede any reference to it which appears in a default
6614 * value in an attribute-list declaration. Note that if
6615 * entities are declared in the external subset or in
6616 * external parameter entities, a non-validating processor
6617 * is not obligated to read and process their declarations;
6618 * for such documents, the rule that an entity must be
6619 * declared is a well-formedness constraint only if
6620 * standalone='yes'.
6621 */
6622 if (ent == NULL) {
6623 if ((ctxt->standalone == 1) ||
6624 ((ctxt->hasExternalSubset == 0) &&
6625 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006626 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006627 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006628 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006629 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006630 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006631 if ((ctxt->inSubset == 0) &&
6632 (ctxt->sax != NULL) &&
6633 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006634 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006635 }
Owen Taylor3473f882001-02-23 17:55:21 +00006636 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006637 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006638 }
6639
6640 /*
6641 * [ WFC: Parsed Entity ]
6642 * An entity reference must not contain the name of an
6643 * unparsed entity
6644 */
6645 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006646 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006647 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649
6650 /*
6651 * [ WFC: No External Entity References ]
6652 * Attribute values cannot contain direct or indirect
6653 * entity references to external entities.
6654 */
6655 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6656 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006657 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6658 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006659 }
6660 /*
6661 * [ WFC: No < in Attribute Values ]
6662 * The replacement text of any entity referred to directly or
6663 * indirectly in an attribute value (other than "&lt;") must
6664 * not contain a <.
6665 */
6666 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6667 (ent != NULL) &&
6668 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6669 (ent->content != NULL) &&
6670 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006671 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006672 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 }
6674
6675 /*
6676 * Internal check, no parameter entities here ...
6677 */
6678 else {
6679 switch (ent->etype) {
6680 case XML_INTERNAL_PARAMETER_ENTITY:
6681 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006682 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6683 "Attempt to reference the parameter entity '%s'\n",
6684 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 break;
6686 default:
6687 break;
6688 }
6689 }
6690
6691 /*
6692 * [ WFC: No Recursion ]
6693 * A parsed entity must not contain a recursive reference
6694 * to itself, either directly or indirectly.
6695 * Done somewhere else
6696 */
6697
6698 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006699 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
Owen Taylor3473f882001-02-23 17:55:21 +00006701 }
6702 }
6703 return(ent);
6704}
6705
6706/**
6707 * xmlParseStringEntityRef:
6708 * @ctxt: an XML parser context
6709 * @str: a pointer to an index in the string
6710 *
6711 * parse ENTITY references declarations, but this version parses it from
6712 * a string value.
6713 *
6714 * [68] EntityRef ::= '&' Name ';'
6715 *
6716 * [ WFC: Entity Declared ]
6717 * In a document without any DTD, a document with only an internal DTD
6718 * subset which contains no parameter entity references, or a document
6719 * with "standalone='yes'", the Name given in the entity reference
6720 * must match that in an entity declaration, except that well-formed
6721 * documents need not declare any of the following entities: amp, lt,
6722 * gt, apos, quot. The declaration of a parameter entity must precede
6723 * any reference to it. Similarly, the declaration of a general entity
6724 * must precede any reference to it which appears in a default value in an
6725 * attribute-list declaration. Note that if entities are declared in the
6726 * external subset or in external parameter entities, a non-validating
6727 * processor is not obligated to read and process their declarations;
6728 * for such documents, the rule that an entity must be declared is a
6729 * well-formedness constraint only if standalone='yes'.
6730 *
6731 * [ WFC: Parsed Entity ]
6732 * An entity reference must not contain the name of an unparsed entity
6733 *
6734 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6735 * is updated to the current location in the string.
6736 */
6737xmlEntityPtr
6738xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6739 xmlChar *name;
6740 const xmlChar *ptr;
6741 xmlChar cur;
6742 xmlEntityPtr ent = NULL;
6743
6744 if ((str == NULL) || (*str == NULL))
6745 return(NULL);
6746 ptr = *str;
6747 cur = *ptr;
6748 if (cur == '&') {
6749 ptr++;
6750 cur = *ptr;
6751 name = xmlParseStringName(ctxt, &ptr);
6752 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6754 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006755 } else {
6756 if (*ptr == ';') {
6757 ptr++;
6758 /*
6759 * Ask first SAX for entity resolution, otherwise try the
6760 * predefined set.
6761 */
6762 if (ctxt->sax != NULL) {
6763 if (ctxt->sax->getEntity != NULL)
6764 ent = ctxt->sax->getEntity(ctxt->userData, name);
6765 if (ent == NULL)
6766 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006767 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006768 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006769 }
Owen Taylor3473f882001-02-23 17:55:21 +00006770 }
6771 /*
6772 * [ WFC: Entity Declared ]
6773 * In a document without any DTD, a document with only an
6774 * internal DTD subset which contains no parameter entity
6775 * references, or a document with "standalone='yes'", the
6776 * Name given in the entity reference must match that in an
6777 * entity declaration, except that well-formed documents
6778 * need not declare any of the following entities: amp, lt,
6779 * gt, apos, quot.
6780 * The declaration of a parameter entity must precede any
6781 * reference to it.
6782 * Similarly, the declaration of a general entity must
6783 * precede any reference to it which appears in a default
6784 * value in an attribute-list declaration. Note that if
6785 * entities are declared in the external subset or in
6786 * external parameter entities, a non-validating processor
6787 * is not obligated to read and process their declarations;
6788 * for such documents, the rule that an entity must be
6789 * declared is a well-formedness constraint only if
6790 * standalone='yes'.
6791 */
6792 if (ent == NULL) {
6793 if ((ctxt->standalone == 1) ||
6794 ((ctxt->hasExternalSubset == 0) &&
6795 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006796 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006797 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006798 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006799 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006800 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006801 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006802 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006803 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006804 }
6805
6806 /*
6807 * [ WFC: Parsed Entity ]
6808 * An entity reference must not contain the name of an
6809 * unparsed entity
6810 */
6811 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006812 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006813 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006814 }
6815
6816 /*
6817 * [ WFC: No External Entity References ]
6818 * Attribute values cannot contain direct or indirect
6819 * entity references to external entities.
6820 */
6821 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6822 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006823 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006824 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006825 }
6826 /*
6827 * [ WFC: No < in Attribute Values ]
6828 * The replacement text of any entity referred to directly or
6829 * indirectly in an attribute value (other than "&lt;") must
6830 * not contain a <.
6831 */
6832 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6833 (ent != NULL) &&
6834 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6835 (ent->content != NULL) &&
6836 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006837 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6838 "'<' in entity '%s' is not allowed in attributes values\n",
6839 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 }
6841
6842 /*
6843 * Internal check, no parameter entities here ...
6844 */
6845 else {
6846 switch (ent->etype) {
6847 case XML_INTERNAL_PARAMETER_ENTITY:
6848 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006849 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6850 "Attempt to reference the parameter entity '%s'\n",
6851 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006852 break;
6853 default:
6854 break;
6855 }
6856 }
6857
6858 /*
6859 * [ WFC: No Recursion ]
6860 * A parsed entity must not contain a recursive reference
6861 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006862 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006863 */
6864
6865 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006867 }
6868 xmlFree(name);
6869 }
6870 }
6871 *str = ptr;
6872 return(ent);
6873}
6874
6875/**
6876 * xmlParsePEReference:
6877 * @ctxt: an XML parser context
6878 *
6879 * parse PEReference declarations
6880 * The entity content is handled directly by pushing it's content as
6881 * a new input stream.
6882 *
6883 * [69] PEReference ::= '%' Name ';'
6884 *
6885 * [ WFC: No Recursion ]
6886 * A parsed entity must not contain a recursive
6887 * reference to itself, either directly or indirectly.
6888 *
6889 * [ WFC: Entity Declared ]
6890 * In a document without any DTD, a document with only an internal DTD
6891 * subset which contains no parameter entity references, or a document
6892 * with "standalone='yes'", ... ... The declaration of a parameter
6893 * entity must precede any reference to it...
6894 *
6895 * [ VC: Entity Declared ]
6896 * In a document with an external subset or external parameter entities
6897 * with "standalone='no'", ... ... The declaration of a parameter entity
6898 * must precede any reference to it...
6899 *
6900 * [ WFC: In DTD ]
6901 * Parameter-entity references may only appear in the DTD.
6902 * NOTE: misleading but this is handled.
6903 */
6904void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006905xmlParsePEReference(xmlParserCtxtPtr ctxt)
6906{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006907 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 xmlEntityPtr entity = NULL;
6909 xmlParserInputPtr input;
6910
6911 if (RAW == '%') {
6912 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006913 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006914 if (name == NULL) {
6915 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6916 "xmlParsePEReference: no name\n");
6917 } else {
6918 if (RAW == ';') {
6919 NEXT;
6920 if ((ctxt->sax != NULL) &&
6921 (ctxt->sax->getParameterEntity != NULL))
6922 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6923 name);
6924 if (entity == NULL) {
6925 /*
6926 * [ WFC: Entity Declared ]
6927 * In a document without any DTD, a document with only an
6928 * internal DTD subset which contains no parameter entity
6929 * references, or a document with "standalone='yes'", ...
6930 * ... The declaration of a parameter entity must precede
6931 * any reference to it...
6932 */
6933 if ((ctxt->standalone == 1) ||
6934 ((ctxt->hasExternalSubset == 0) &&
6935 (ctxt->hasPErefs == 0))) {
6936 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6937 "PEReference: %%%s; not found\n",
6938 name);
6939 } else {
6940 /*
6941 * [ VC: Entity Declared ]
6942 * In a document with an external subset or external
6943 * parameter entities with "standalone='no'", ...
6944 * ... The declaration of a parameter entity must
6945 * precede any reference to it...
6946 */
6947 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6948 "PEReference: %%%s; not found\n",
6949 name, NULL);
6950 ctxt->valid = 0;
6951 }
6952 } else {
6953 /*
6954 * Internal checking in case the entity quest barfed
6955 */
6956 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6957 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6958 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6959 "Internal: %%%s; is not a parameter entity\n",
6960 name, NULL);
6961 } else if (ctxt->input->free != deallocblankswrapper) {
6962 input =
6963 xmlNewBlanksWrapperInputStream(ctxt, entity);
6964 xmlPushInput(ctxt, input);
6965 } else {
6966 /*
6967 * TODO !!!
6968 * handle the extra spaces added before and after
6969 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6970 */
6971 input = xmlNewEntityInputStream(ctxt, entity);
6972 xmlPushInput(ctxt, input);
6973 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006974 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006975 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006976 xmlParseTextDecl(ctxt);
6977 if (ctxt->errNo ==
6978 XML_ERR_UNSUPPORTED_ENCODING) {
6979 /*
6980 * The XML REC instructs us to stop parsing
6981 * right here
6982 */
6983 ctxt->instate = XML_PARSER_EOF;
6984 return;
6985 }
6986 }
6987 }
6988 }
6989 ctxt->hasPErefs = 1;
6990 } else {
6991 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6992 }
6993 }
Owen Taylor3473f882001-02-23 17:55:21 +00006994 }
6995}
6996
6997/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006998 * xmlLoadEntityContent:
6999 * @ctxt: an XML parser context
7000 * @entity: an unloaded system entity
7001 *
7002 * Load the original content of the given system entity from the
7003 * ExternalID/SystemID given. This is to be used for Included in Literal
7004 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7005 *
7006 * Returns 0 in case of success and -1 in case of failure
7007 */
7008static int
7009xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7010 xmlParserInputPtr input;
7011 xmlBufferPtr buf;
7012 int l, c;
7013 int count = 0;
7014
7015 if ((ctxt == NULL) || (entity == NULL) ||
7016 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7017 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7018 (entity->content != NULL)) {
7019 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7020 "xmlLoadEntityContent parameter error");
7021 return(-1);
7022 }
7023
7024 if (xmlParserDebugEntities)
7025 xmlGenericError(xmlGenericErrorContext,
7026 "Reading %s entity content input\n", entity->name);
7027
7028 buf = xmlBufferCreate();
7029 if (buf == NULL) {
7030 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7031 "xmlLoadEntityContent parameter error");
7032 return(-1);
7033 }
7034
7035 input = xmlNewEntityInputStream(ctxt, entity);
7036 if (input == NULL) {
7037 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7038 "xmlLoadEntityContent input error");
7039 xmlBufferFree(buf);
7040 return(-1);
7041 }
7042
7043 /*
7044 * Push the entity as the current input, read char by char
7045 * saving to the buffer until the end of the entity or an error
7046 */
7047 xmlPushInput(ctxt, input);
7048 GROW;
7049 c = CUR_CHAR(l);
7050 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7051 (IS_CHAR(c))) {
7052 xmlBufferAdd(buf, ctxt->input->cur, l);
7053 if (count++ > 100) {
7054 count = 0;
7055 GROW;
7056 }
7057 NEXTL(l);
7058 c = CUR_CHAR(l);
7059 }
7060
7061 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7062 xmlPopInput(ctxt);
7063 } else if (!IS_CHAR(c)) {
7064 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7065 "xmlLoadEntityContent: invalid char value %d\n",
7066 c);
7067 xmlBufferFree(buf);
7068 return(-1);
7069 }
7070 entity->content = buf->content;
7071 buf->content = NULL;
7072 xmlBufferFree(buf);
7073
7074 return(0);
7075}
7076
7077/**
Owen Taylor3473f882001-02-23 17:55:21 +00007078 * xmlParseStringPEReference:
7079 * @ctxt: an XML parser context
7080 * @str: a pointer to an index in the string
7081 *
7082 * parse PEReference declarations
7083 *
7084 * [69] PEReference ::= '%' Name ';'
7085 *
7086 * [ WFC: No Recursion ]
7087 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007088 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007089 *
7090 * [ WFC: Entity Declared ]
7091 * In a document without any DTD, a document with only an internal DTD
7092 * subset which contains no parameter entity references, or a document
7093 * with "standalone='yes'", ... ... The declaration of a parameter
7094 * entity must precede any reference to it...
7095 *
7096 * [ VC: Entity Declared ]
7097 * In a document with an external subset or external parameter entities
7098 * with "standalone='no'", ... ... The declaration of a parameter entity
7099 * must precede any reference to it...
7100 *
7101 * [ WFC: In DTD ]
7102 * Parameter-entity references may only appear in the DTD.
7103 * NOTE: misleading but this is handled.
7104 *
7105 * Returns the string of the entity content.
7106 * str is updated to the current value of the index
7107 */
7108xmlEntityPtr
7109xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7110 const xmlChar *ptr;
7111 xmlChar cur;
7112 xmlChar *name;
7113 xmlEntityPtr entity = NULL;
7114
7115 if ((str == NULL) || (*str == NULL)) return(NULL);
7116 ptr = *str;
7117 cur = *ptr;
7118 if (cur == '%') {
7119 ptr++;
7120 cur = *ptr;
7121 name = xmlParseStringName(ctxt, &ptr);
7122 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7124 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007125 } else {
7126 cur = *ptr;
7127 if (cur == ';') {
7128 ptr++;
7129 cur = *ptr;
7130 if ((ctxt->sax != NULL) &&
7131 (ctxt->sax->getParameterEntity != NULL))
7132 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7133 name);
7134 if (entity == NULL) {
7135 /*
7136 * [ WFC: Entity Declared ]
7137 * In a document without any DTD, a document with only an
7138 * internal DTD subset which contains no parameter entity
7139 * references, or a document with "standalone='yes'", ...
7140 * ... The declaration of a parameter entity must precede
7141 * any reference to it...
7142 */
7143 if ((ctxt->standalone == 1) ||
7144 ((ctxt->hasExternalSubset == 0) &&
7145 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007147 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007148 } else {
7149 /*
7150 * [ VC: Entity Declared ]
7151 * In a document with an external subset or external
7152 * parameter entities with "standalone='no'", ...
7153 * ... The declaration of a parameter entity must
7154 * precede any reference to it...
7155 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7157 "PEReference: %%%s; not found\n",
7158 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007159 ctxt->valid = 0;
7160 }
7161 } else {
7162 /*
7163 * Internal checking in case the entity quest barfed
7164 */
7165 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7166 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007167 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7168 "%%%s; is not a parameter entity\n",
7169 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007170 }
7171 }
7172 ctxt->hasPErefs = 1;
7173 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007174 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007175 }
7176 xmlFree(name);
7177 }
7178 }
7179 *str = ptr;
7180 return(entity);
7181}
7182
7183/**
7184 * xmlParseDocTypeDecl:
7185 * @ctxt: an XML parser context
7186 *
7187 * parse a DOCTYPE declaration
7188 *
7189 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7190 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7191 *
7192 * [ VC: Root Element Type ]
7193 * The Name in the document type declaration must match the element
7194 * type of the root element.
7195 */
7196
7197void
7198xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007199 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007200 xmlChar *ExternalID = NULL;
7201 xmlChar *URI = NULL;
7202
7203 /*
7204 * We know that '<!DOCTYPE' has been detected.
7205 */
7206 SKIP(9);
7207
7208 SKIP_BLANKS;
7209
7210 /*
7211 * Parse the DOCTYPE name.
7212 */
7213 name = xmlParseName(ctxt);
7214 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7216 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007217 }
7218 ctxt->intSubName = name;
7219
7220 SKIP_BLANKS;
7221
7222 /*
7223 * Check for SystemID and ExternalID
7224 */
7225 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7226
7227 if ((URI != NULL) || (ExternalID != NULL)) {
7228 ctxt->hasExternalSubset = 1;
7229 }
7230 ctxt->extSubURI = URI;
7231 ctxt->extSubSystem = ExternalID;
7232
7233 SKIP_BLANKS;
7234
7235 /*
7236 * Create and update the internal subset.
7237 */
7238 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7239 (!ctxt->disableSAX))
7240 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7241
7242 /*
7243 * Is there any internal subset declarations ?
7244 * they are handled separately in xmlParseInternalSubset()
7245 */
7246 if (RAW == '[')
7247 return;
7248
7249 /*
7250 * We should be at the end of the DOCTYPE declaration.
7251 */
7252 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007253 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007254 }
7255 NEXT;
7256}
7257
7258/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007259 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007260 * @ctxt: an XML parser context
7261 *
7262 * parse the internal subset declaration
7263 *
7264 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7265 */
7266
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007267static void
Owen Taylor3473f882001-02-23 17:55:21 +00007268xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7269 /*
7270 * Is there any DTD definition ?
7271 */
7272 if (RAW == '[') {
7273 ctxt->instate = XML_PARSER_DTD;
7274 NEXT;
7275 /*
7276 * Parse the succession of Markup declarations and
7277 * PEReferences.
7278 * Subsequence (markupdecl | PEReference | S)*
7279 */
7280 while (RAW != ']') {
7281 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007282 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007283
7284 SKIP_BLANKS;
7285 xmlParseMarkupDecl(ctxt);
7286 xmlParsePEReference(ctxt);
7287
7288 /*
7289 * Pop-up of finished entities.
7290 */
7291 while ((RAW == 0) && (ctxt->inputNr > 1))
7292 xmlPopInput(ctxt);
7293
7294 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007295 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007296 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007297 break;
7298 }
7299 }
7300 if (RAW == ']') {
7301 NEXT;
7302 SKIP_BLANKS;
7303 }
7304 }
7305
7306 /*
7307 * We should be at the end of the DOCTYPE declaration.
7308 */
7309 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007311 }
7312 NEXT;
7313}
7314
Daniel Veillard81273902003-09-30 00:43:48 +00007315#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007316/**
7317 * xmlParseAttribute:
7318 * @ctxt: an XML parser context
7319 * @value: a xmlChar ** used to store the value of the attribute
7320 *
7321 * parse an attribute
7322 *
7323 * [41] Attribute ::= Name Eq AttValue
7324 *
7325 * [ WFC: No External Entity References ]
7326 * Attribute values cannot contain direct or indirect entity references
7327 * to external entities.
7328 *
7329 * [ WFC: No < in Attribute Values ]
7330 * The replacement text of any entity referred to directly or indirectly in
7331 * an attribute value (other than "&lt;") must not contain a <.
7332 *
7333 * [ VC: Attribute Value Type ]
7334 * The attribute must have been declared; the value must be of the type
7335 * declared for it.
7336 *
7337 * [25] Eq ::= S? '=' S?
7338 *
7339 * With namespace:
7340 *
7341 * [NS 11] Attribute ::= QName Eq AttValue
7342 *
7343 * Also the case QName == xmlns:??? is handled independently as a namespace
7344 * definition.
7345 *
7346 * Returns the attribute name, and the value in *value.
7347 */
7348
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007349const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007350xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007351 const xmlChar *name;
7352 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007353
7354 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007355 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007356 name = xmlParseName(ctxt);
7357 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007358 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007359 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007360 return(NULL);
7361 }
7362
7363 /*
7364 * read the value
7365 */
7366 SKIP_BLANKS;
7367 if (RAW == '=') {
7368 NEXT;
7369 SKIP_BLANKS;
7370 val = xmlParseAttValue(ctxt);
7371 ctxt->instate = XML_PARSER_CONTENT;
7372 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007373 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007374 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007375 return(NULL);
7376 }
7377
7378 /*
7379 * Check that xml:lang conforms to the specification
7380 * No more registered as an error, just generate a warning now
7381 * since this was deprecated in XML second edition
7382 */
7383 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7384 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007385 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7386 "Malformed value for xml:lang : %s\n",
7387 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007388 }
7389 }
7390
7391 /*
7392 * Check that xml:space conforms to the specification
7393 */
7394 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7395 if (xmlStrEqual(val, BAD_CAST "default"))
7396 *(ctxt->space) = 0;
7397 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7398 *(ctxt->space) = 1;
7399 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007400 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007401"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007402 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007403 }
7404 }
7405
7406 *value = val;
7407 return(name);
7408}
7409
7410/**
7411 * xmlParseStartTag:
7412 * @ctxt: an XML parser context
7413 *
7414 * parse a start of tag either for rule element or
7415 * EmptyElement. In both case we don't parse the tag closing chars.
7416 *
7417 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7418 *
7419 * [ WFC: Unique Att Spec ]
7420 * No attribute name may appear more than once in the same start-tag or
7421 * empty-element tag.
7422 *
7423 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7424 *
7425 * [ WFC: Unique Att Spec ]
7426 * No attribute name may appear more than once in the same start-tag or
7427 * empty-element tag.
7428 *
7429 * With namespace:
7430 *
7431 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7432 *
7433 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7434 *
7435 * Returns the element name parsed
7436 */
7437
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007438const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007439xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007440 const xmlChar *name;
7441 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007442 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007443 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007444 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007445 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007446 int i;
7447
7448 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007449 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007450
7451 name = xmlParseName(ctxt);
7452 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007454 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007455 return(NULL);
7456 }
7457
7458 /*
7459 * Now parse the attributes, it ends up with the ending
7460 *
7461 * (S Attribute)* S?
7462 */
7463 SKIP_BLANKS;
7464 GROW;
7465
Daniel Veillard21a0f912001-02-25 19:54:14 +00007466 while ((RAW != '>') &&
7467 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007468 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007469 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007470 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007471
7472 attname = xmlParseAttribute(ctxt, &attvalue);
7473 if ((attname != NULL) && (attvalue != NULL)) {
7474 /*
7475 * [ WFC: Unique Att Spec ]
7476 * No attribute name may appear more than once in the same
7477 * start-tag or empty-element tag.
7478 */
7479 for (i = 0; i < nbatts;i += 2) {
7480 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007481 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007482 xmlFree(attvalue);
7483 goto failed;
7484 }
7485 }
Owen Taylor3473f882001-02-23 17:55:21 +00007486 /*
7487 * Add the pair to atts
7488 */
7489 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007490 maxatts = 22; /* allow for 10 attrs by default */
7491 atts = (const xmlChar **)
7492 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007493 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007494 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007495 if (attvalue != NULL)
7496 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007497 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007498 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007499 ctxt->atts = atts;
7500 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007501 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007502 const xmlChar **n;
7503
Owen Taylor3473f882001-02-23 17:55:21 +00007504 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007505 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007506 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007507 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007508 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007509 if (attvalue != NULL)
7510 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007511 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007512 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007513 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007514 ctxt->atts = atts;
7515 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007516 }
7517 atts[nbatts++] = attname;
7518 atts[nbatts++] = attvalue;
7519 atts[nbatts] = NULL;
7520 atts[nbatts + 1] = NULL;
7521 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007522 if (attvalue != NULL)
7523 xmlFree(attvalue);
7524 }
7525
7526failed:
7527
Daniel Veillard3772de32002-12-17 10:31:45 +00007528 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007529 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7530 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007531 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7533 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007534 }
7535 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7537 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7539 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007540 break;
7541 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007542 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007543 GROW;
7544 }
7545
7546 /*
7547 * SAX: Start of Element !
7548 */
7549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007550 (!ctxt->disableSAX)) {
7551 if (nbatts > 0)
7552 ctxt->sax->startElement(ctxt->userData, name, atts);
7553 else
7554 ctxt->sax->startElement(ctxt->userData, name, NULL);
7555 }
Owen Taylor3473f882001-02-23 17:55:21 +00007556
7557 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007558 /* Free only the content strings */
7559 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007560 if (atts[i] != NULL)
7561 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007562 }
7563 return(name);
7564}
7565
7566/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007567 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007568 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007569 * @line: line of the start tag
7570 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007571 *
7572 * parse an end of tag
7573 *
7574 * [42] ETag ::= '</' Name S? '>'
7575 *
7576 * With namespace
7577 *
7578 * [NS 9] ETag ::= '</' QName S? '>'
7579 */
7580
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007581static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007582xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007583 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007584
7585 GROW;
7586 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007588 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007589 return;
7590 }
7591 SKIP(2);
7592
Daniel Veillard46de64e2002-05-29 08:21:33 +00007593 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007594
7595 /*
7596 * We should definitely be at the ending "S? '>'" part
7597 */
7598 GROW;
7599 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007602 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007603 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007604
7605 /*
7606 * [ WFC: Element Type Match ]
7607 * The Name in an element's end-tag must match the element type in the
7608 * start-tag.
7609 *
7610 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007611 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007612 if (name == NULL) name = BAD_CAST "unparseable";
7613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007614 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007615 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007616 }
7617
7618 /*
7619 * SAX: End of Tag
7620 */
7621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7622 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007623 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007624
Daniel Veillarde57ec792003-09-10 10:50:59 +00007625 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007626 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007627 return;
7628}
7629
7630/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007631 * xmlParseEndTag:
7632 * @ctxt: an XML parser context
7633 *
7634 * parse an end of tag
7635 *
7636 * [42] ETag ::= '</' Name S? '>'
7637 *
7638 * With namespace
7639 *
7640 * [NS 9] ETag ::= '</' QName S? '>'
7641 */
7642
7643void
7644xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 xmlParseEndTag1(ctxt, 0);
7646}
Daniel Veillard81273902003-09-30 00:43:48 +00007647#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007648
7649/************************************************************************
7650 * *
7651 * SAX 2 specific operations *
7652 * *
7653 ************************************************************************/
7654
7655static const xmlChar *
7656xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7657 int len = 0, l;
7658 int c;
7659 int count = 0;
7660
7661 /*
7662 * Handler for more complex cases
7663 */
7664 GROW;
7665 c = CUR_CHAR(l);
7666 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007667 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668 return(NULL);
7669 }
7670
7671 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007672 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007673 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007674 (IS_COMBINING(c)) ||
7675 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007676 if (count++ > 100) {
7677 count = 0;
7678 GROW;
7679 }
7680 len += l;
7681 NEXTL(l);
7682 c = CUR_CHAR(l);
7683 }
7684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7685}
7686
7687/*
7688 * xmlGetNamespace:
7689 * @ctxt: an XML parser context
7690 * @prefix: the prefix to lookup
7691 *
7692 * Lookup the namespace name for the @prefix (which ca be NULL)
7693 * The prefix must come from the @ctxt->dict dictionnary
7694 *
7695 * Returns the namespace name or NULL if not bound
7696 */
7697static const xmlChar *
7698xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7699 int i;
7700
Daniel Veillarde57ec792003-09-10 10:50:59 +00007701 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007703 if (ctxt->nsTab[i] == prefix) {
7704 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7705 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007706 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007707 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 return(NULL);
7709}
7710
7711/**
7712 * xmlParseNCName:
7713 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007714 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 *
7716 * parse an XML name.
7717 *
7718 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7719 * CombiningChar | Extender
7720 *
7721 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7722 *
7723 * Returns the Name parsed or NULL
7724 */
7725
7726static const xmlChar *
7727xmlParseNCName(xmlParserCtxtPtr ctxt) {
7728 const xmlChar *in;
7729 const xmlChar *ret;
7730 int count = 0;
7731
7732 /*
7733 * Accelerator for simple ASCII names
7734 */
7735 in = ctxt->input->cur;
7736 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7737 ((*in >= 0x41) && (*in <= 0x5A)) ||
7738 (*in == '_')) {
7739 in++;
7740 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7741 ((*in >= 0x41) && (*in <= 0x5A)) ||
7742 ((*in >= 0x30) && (*in <= 0x39)) ||
7743 (*in == '_') || (*in == '-') ||
7744 (*in == '.'))
7745 in++;
7746 if ((*in > 0) && (*in < 0x80)) {
7747 count = in - ctxt->input->cur;
7748 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7749 ctxt->input->cur = in;
7750 ctxt->nbChars += count;
7751 ctxt->input->col += count;
7752 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007753 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007754 }
7755 return(ret);
7756 }
7757 }
7758 return(xmlParseNCNameComplex(ctxt));
7759}
7760
7761/**
7762 * xmlParseQName:
7763 * @ctxt: an XML parser context
7764 * @prefix: pointer to store the prefix part
7765 *
7766 * parse an XML Namespace QName
7767 *
7768 * [6] QName ::= (Prefix ':')? LocalPart
7769 * [7] Prefix ::= NCName
7770 * [8] LocalPart ::= NCName
7771 *
7772 * Returns the Name parsed or NULL
7773 */
7774
7775static const xmlChar *
7776xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7777 const xmlChar *l, *p;
7778
7779 GROW;
7780
7781 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007782 if (l == NULL) {
7783 if (CUR == ':') {
7784 l = xmlParseName(ctxt);
7785 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007786 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7787 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007788 *prefix = NULL;
7789 return(l);
7790 }
7791 }
7792 return(NULL);
7793 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007794 if (CUR == ':') {
7795 NEXT;
7796 p = l;
7797 l = xmlParseNCName(ctxt);
7798 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007799 xmlChar *tmp;
7800
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007801 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7802 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007803 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7804 p = xmlDictLookup(ctxt->dict, tmp, -1);
7805 if (tmp != NULL) xmlFree(tmp);
7806 *prefix = NULL;
7807 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 }
7809 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007810 xmlChar *tmp;
7811
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007812 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7813 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007814 NEXT;
7815 tmp = (xmlChar *) xmlParseName(ctxt);
7816 if (tmp != NULL) {
7817 tmp = xmlBuildQName(tmp, l, NULL, 0);
7818 l = xmlDictLookup(ctxt->dict, tmp, -1);
7819 if (tmp != NULL) xmlFree(tmp);
7820 *prefix = p;
7821 return(l);
7822 }
7823 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7824 l = xmlDictLookup(ctxt->dict, tmp, -1);
7825 if (tmp != NULL) xmlFree(tmp);
7826 *prefix = p;
7827 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007828 }
7829 *prefix = p;
7830 } else
7831 *prefix = NULL;
7832 return(l);
7833}
7834
7835/**
7836 * xmlParseQNameAndCompare:
7837 * @ctxt: an XML parser context
7838 * @name: the localname
7839 * @prefix: the prefix, if any.
7840 *
7841 * parse an XML name and compares for match
7842 * (specialized for endtag parsing)
7843 *
7844 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7845 * and the name for mismatch
7846 */
7847
7848static const xmlChar *
7849xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7850 xmlChar const *prefix) {
7851 const xmlChar *cmp = name;
7852 const xmlChar *in;
7853 const xmlChar *ret;
7854 const xmlChar *prefix2;
7855
7856 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7857
7858 GROW;
7859 in = ctxt->input->cur;
7860
7861 cmp = prefix;
7862 while (*in != 0 && *in == *cmp) {
7863 ++in;
7864 ++cmp;
7865 }
7866 if ((*cmp == 0) && (*in == ':')) {
7867 in++;
7868 cmp = name;
7869 while (*in != 0 && *in == *cmp) {
7870 ++in;
7871 ++cmp;
7872 }
William M. Brack76e95df2003-10-18 16:20:14 +00007873 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007874 /* success */
7875 ctxt->input->cur = in;
7876 return((const xmlChar*) 1);
7877 }
7878 }
7879 /*
7880 * all strings coms from the dictionary, equality can be done directly
7881 */
7882 ret = xmlParseQName (ctxt, &prefix2);
7883 if ((ret == name) && (prefix == prefix2))
7884 return((const xmlChar*) 1);
7885 return ret;
7886}
7887
7888/**
7889 * xmlParseAttValueInternal:
7890 * @ctxt: an XML parser context
7891 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007892 * @alloc: whether the attribute was reallocated as a new string
7893 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007894 *
7895 * parse a value for an attribute.
7896 * NOTE: if no normalization is needed, the routine will return pointers
7897 * directly from the data buffer.
7898 *
7899 * 3.3.3 Attribute-Value Normalization:
7900 * Before the value of an attribute is passed to the application or
7901 * checked for validity, the XML processor must normalize it as follows:
7902 * - a character reference is processed by appending the referenced
7903 * character to the attribute value
7904 * - an entity reference is processed by recursively processing the
7905 * replacement text of the entity
7906 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7907 * appending #x20 to the normalized value, except that only a single
7908 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7909 * parsed entity or the literal entity value of an internal parsed entity
7910 * - other characters are processed by appending them to the normalized value
7911 * If the declared value is not CDATA, then the XML processor must further
7912 * process the normalized attribute value by discarding any leading and
7913 * trailing space (#x20) characters, and by replacing sequences of space
7914 * (#x20) characters by a single space (#x20) character.
7915 * All attributes for which no declaration has been read should be treated
7916 * by a non-validating parser as if declared CDATA.
7917 *
7918 * Returns the AttValue parsed or NULL. The value has to be freed by the
7919 * caller if it was copied, this can be detected by val[*len] == 0.
7920 */
7921
7922static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007923xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7924 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007925{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007926 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007927 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007928 xmlChar *ret = NULL;
7929
7930 GROW;
7931 in = (xmlChar *) CUR_PTR;
7932 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007933 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007934 return (NULL);
7935 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007937
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007938 /*
7939 * try to handle in this routine the most common case where no
7940 * allocation of a new string is required and where content is
7941 * pure ASCII.
7942 */
7943 limit = *in++;
7944 end = ctxt->input->end;
7945 start = in;
7946 if (in >= end) {
7947 const xmlChar *oldbase = ctxt->input->base;
7948 GROW;
7949 if (oldbase != ctxt->input->base) {
7950 long delta = ctxt->input->base - oldbase;
7951 start = start + delta;
7952 in = in + delta;
7953 }
7954 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007955 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007956 if (normalize) {
7957 /*
7958 * Skip any leading spaces
7959 */
7960 while ((in < end) && (*in != limit) &&
7961 ((*in == 0x20) || (*in == 0x9) ||
7962 (*in == 0xA) || (*in == 0xD))) {
7963 in++;
7964 start = in;
7965 if (in >= end) {
7966 const xmlChar *oldbase = ctxt->input->base;
7967 GROW;
7968 if (oldbase != ctxt->input->base) {
7969 long delta = ctxt->input->base - oldbase;
7970 start = start + delta;
7971 in = in + delta;
7972 }
7973 end = ctxt->input->end;
7974 }
7975 }
7976 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7977 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7978 if ((*in++ == 0x20) && (*in == 0x20)) break;
7979 if (in >= end) {
7980 const xmlChar *oldbase = ctxt->input->base;
7981 GROW;
7982 if (oldbase != ctxt->input->base) {
7983 long delta = ctxt->input->base - oldbase;
7984 start = start + delta;
7985 in = in + delta;
7986 }
7987 end = ctxt->input->end;
7988 }
7989 }
7990 last = in;
7991 /*
7992 * skip the trailing blanks
7993 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007994 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007995 while ((in < end) && (*in != limit) &&
7996 ((*in == 0x20) || (*in == 0x9) ||
7997 (*in == 0xA) || (*in == 0xD))) {
7998 in++;
7999 if (in >= end) {
8000 const xmlChar *oldbase = ctxt->input->base;
8001 GROW;
8002 if (oldbase != ctxt->input->base) {
8003 long delta = ctxt->input->base - oldbase;
8004 start = start + delta;
8005 in = in + delta;
8006 last = last + delta;
8007 }
8008 end = ctxt->input->end;
8009 }
8010 }
8011 if (*in != limit) goto need_complex;
8012 } else {
8013 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8014 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8015 in++;
8016 if (in >= end) {
8017 const xmlChar *oldbase = ctxt->input->base;
8018 GROW;
8019 if (oldbase != ctxt->input->base) {
8020 long delta = ctxt->input->base - oldbase;
8021 start = start + delta;
8022 in = in + delta;
8023 }
8024 end = ctxt->input->end;
8025 }
8026 }
8027 last = in;
8028 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008029 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008030 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008031 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008032 *len = last - start;
8033 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008035 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008036 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008037 }
8038 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008039 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008040 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008041need_complex:
8042 if (alloc) *alloc = 1;
8043 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044}
8045
8046/**
8047 * xmlParseAttribute2:
8048 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008049 * @pref: the element prefix
8050 * @elem: the element name
8051 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008052 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008053 * @len: an int * to save the length of the attribute
8054 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008055 *
8056 * parse an attribute in the new SAX2 framework.
8057 *
8058 * Returns the attribute name, and the value in *value, .
8059 */
8060
8061static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008062xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008063 const xmlChar * pref, const xmlChar * elem,
8064 const xmlChar ** prefix, xmlChar ** value,
8065 int *len, int *alloc)
8066{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008067 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008068 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008069 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008070
8071 *value = NULL;
8072 GROW;
8073 name = xmlParseQName(ctxt, prefix);
8074 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008075 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8076 "error parsing attribute name\n");
8077 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 }
8079
8080 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008081 * get the type if needed
8082 */
8083 if (ctxt->attsSpecial != NULL) {
8084 int type;
8085
8086 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008087 pref, elem, *prefix, name);
8088 if (type != 0)
8089 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008090 }
8091
8092 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008093 * read the value
8094 */
8095 SKIP_BLANKS;
8096 if (RAW == '=') {
8097 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008098 SKIP_BLANKS;
8099 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8100 if (normalize) {
8101 /*
8102 * Sometimes a second normalisation pass for spaces is needed
8103 * but that only happens if charrefs or entities refernces
8104 * have been used in the attribute value, i.e. the attribute
8105 * value have been extracted in an allocated string already.
8106 */
8107 if (*alloc) {
8108 const xmlChar *val2;
8109
8110 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8111 if (val2 != NULL) {
8112 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008113 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008114 }
8115 }
8116 }
8117 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008118 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008119 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8120 "Specification mandate value for attribute %s\n",
8121 name);
8122 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008123 }
8124
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008125 if (*prefix == ctxt->str_xml) {
8126 /*
8127 * Check that xml:lang conforms to the specification
8128 * No more registered as an error, just generate a warning now
8129 * since this was deprecated in XML second edition
8130 */
8131 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8132 internal_val = xmlStrndup(val, *len);
8133 if (!xmlCheckLanguageID(internal_val)) {
8134 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8135 "Malformed value for xml:lang : %s\n",
8136 internal_val, NULL);
8137 }
8138 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008139
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008140 /*
8141 * Check that xml:space conforms to the specification
8142 */
8143 if (xmlStrEqual(name, BAD_CAST "space")) {
8144 internal_val = xmlStrndup(val, *len);
8145 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8146 *(ctxt->space) = 0;
8147 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8148 *(ctxt->space) = 1;
8149 else {
8150 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8151 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8152 internal_val, NULL);
8153 }
8154 }
8155 if (internal_val) {
8156 xmlFree(internal_val);
8157 }
8158 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008159
8160 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008161 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008162}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008163/**
8164 * xmlParseStartTag2:
8165 * @ctxt: an XML parser context
8166 *
8167 * parse a start of tag either for rule element or
8168 * EmptyElement. In both case we don't parse the tag closing chars.
8169 * This routine is called when running SAX2 parsing
8170 *
8171 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8172 *
8173 * [ WFC: Unique Att Spec ]
8174 * No attribute name may appear more than once in the same start-tag or
8175 * empty-element tag.
8176 *
8177 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8178 *
8179 * [ WFC: Unique Att Spec ]
8180 * No attribute name may appear more than once in the same start-tag or
8181 * empty-element tag.
8182 *
8183 * With namespace:
8184 *
8185 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8186 *
8187 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8188 *
8189 * Returns the element name parsed
8190 */
8191
8192static const xmlChar *
8193xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008194 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195 const xmlChar *localname;
8196 const xmlChar *prefix;
8197 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008198 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008199 const xmlChar *nsname;
8200 xmlChar *attvalue;
8201 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008202 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008203 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008204 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008205 const xmlChar *base;
8206 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008207 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008208
8209 if (RAW != '<') return(NULL);
8210 NEXT1;
8211
8212 /*
8213 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8214 * point since the attribute values may be stored as pointers to
8215 * the buffer and calling SHRINK would destroy them !
8216 * The Shrinking is only possible once the full set of attribute
8217 * callbacks have been done.
8218 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008219reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008220 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008221 base = ctxt->input->base;
8222 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008223 oldline = ctxt->input->line;
8224 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008225 nbatts = 0;
8226 nratts = 0;
8227 nbdef = 0;
8228 nbNs = 0;
8229 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008230 /* Forget any namespaces added during an earlier parse of this element. */
8231 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008232
8233 localname = xmlParseQName(ctxt, &prefix);
8234 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008235 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8236 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008237 return(NULL);
8238 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008239 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008240
8241 /*
8242 * Now parse the attributes, it ends up with the ending
8243 *
8244 * (S Attribute)* S?
8245 */
8246 SKIP_BLANKS;
8247 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008248 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008249
8250 while ((RAW != '>') &&
8251 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008252 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253 const xmlChar *q = CUR_PTR;
8254 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008255 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008257 attname = xmlParseAttribute2(ctxt, prefix, localname,
8258 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008259 if (ctxt->input->base != base) {
8260 if ((attvalue != NULL) && (alloc != 0))
8261 xmlFree(attvalue);
8262 attvalue = NULL;
8263 goto base_changed;
8264 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008265 if ((attname != NULL) && (attvalue != NULL)) {
8266 if (len < 0) len = xmlStrlen(attvalue);
8267 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008268 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8269 xmlURIPtr uri;
8270
8271 if (*URL != 0) {
8272 uri = xmlParseURI((const char *) URL);
8273 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008274 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8275 "xmlns: %s not a valid URI\n",
8276 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008277 } else {
8278 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008279 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8280 "xmlns: URI %s is not absolute\n",
8281 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008282 }
8283 xmlFreeURI(uri);
8284 }
8285 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008286 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008287 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008289 for (j = 1;j <= nbNs;j++)
8290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8291 break;
8292 if (j <= nbNs)
8293 xmlErrAttributeDup(ctxt, NULL, attname);
8294 else
8295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008296 if (alloc != 0) xmlFree(attvalue);
8297 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008298 continue;
8299 }
8300 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008301 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8302 xmlURIPtr uri;
8303
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008304 if (attname == ctxt->str_xml) {
8305 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008306 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8307 "xml namespace prefix mapped to wrong URI\n",
8308 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008309 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008310 /*
8311 * Do not keep a namespace definition node
8312 */
8313 if (alloc != 0) xmlFree(attvalue);
8314 SKIP_BLANKS;
8315 continue;
8316 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008317 uri = xmlParseURI((const char *) URL);
8318 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008319 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8320 "xmlns:%s: '%s' is not a valid URI\n",
8321 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008322 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008323 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008324 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8325 "xmlns:%s: URI %s is not absolute\n",
8326 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008327 }
8328 xmlFreeURI(uri);
8329 }
8330
Daniel Veillard0fb18932003-09-07 09:14:37 +00008331 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008332 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008333 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008334 for (j = 1;j <= nbNs;j++)
8335 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8336 break;
8337 if (j <= nbNs)
8338 xmlErrAttributeDup(ctxt, aprefix, attname);
8339 else
8340 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008341 if (alloc != 0) xmlFree(attvalue);
8342 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008343 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008344 continue;
8345 }
8346
8347 /*
8348 * Add the pair to atts
8349 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008350 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8351 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008352 if (attvalue[len] == 0)
8353 xmlFree(attvalue);
8354 goto failed;
8355 }
8356 maxatts = ctxt->maxatts;
8357 atts = ctxt->atts;
8358 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008359 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008360 atts[nbatts++] = attname;
8361 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008362 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008363 atts[nbatts++] = attvalue;
8364 attvalue += len;
8365 atts[nbatts++] = attvalue;
8366 /*
8367 * tag if some deallocation is needed
8368 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008369 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008370 } else {
8371 if ((attvalue != NULL) && (attvalue[len] == 0))
8372 xmlFree(attvalue);
8373 }
8374
8375failed:
8376
8377 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008378 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008379 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8380 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008381 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8383 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008384 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008385 }
8386 SKIP_BLANKS;
8387 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8388 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008389 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008390 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391 break;
8392 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008393 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008394 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008395 }
8396
Daniel Veillard0fb18932003-09-07 09:14:37 +00008397 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008398 * The attributes defaulting
8399 */
8400 if (ctxt->attsDefault != NULL) {
8401 xmlDefAttrsPtr defaults;
8402
8403 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8404 if (defaults != NULL) {
8405 for (i = 0;i < defaults->nbAttrs;i++) {
8406 attname = defaults->values[4 * i];
8407 aprefix = defaults->values[4 * i + 1];
8408
8409 /*
8410 * special work for namespaces defaulted defs
8411 */
8412 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8413 /*
8414 * check that it's not a defined namespace
8415 */
8416 for (j = 1;j <= nbNs;j++)
8417 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8418 break;
8419 if (j <= nbNs) continue;
8420
8421 nsname = xmlGetNamespace(ctxt, NULL);
8422 if (nsname != defaults->values[4 * i + 2]) {
8423 if (nsPush(ctxt, NULL,
8424 defaults->values[4 * i + 2]) > 0)
8425 nbNs++;
8426 }
8427 } else if (aprefix == ctxt->str_xmlns) {
8428 /*
8429 * check that it's not a defined namespace
8430 */
8431 for (j = 1;j <= nbNs;j++)
8432 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8433 break;
8434 if (j <= nbNs) continue;
8435
8436 nsname = xmlGetNamespace(ctxt, attname);
8437 if (nsname != defaults->values[2]) {
8438 if (nsPush(ctxt, attname,
8439 defaults->values[4 * i + 2]) > 0)
8440 nbNs++;
8441 }
8442 } else {
8443 /*
8444 * check that it's not a defined attribute
8445 */
8446 for (j = 0;j < nbatts;j+=5) {
8447 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8448 break;
8449 }
8450 if (j < nbatts) continue;
8451
8452 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8453 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008454 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008455 }
8456 maxatts = ctxt->maxatts;
8457 atts = ctxt->atts;
8458 }
8459 atts[nbatts++] = attname;
8460 atts[nbatts++] = aprefix;
8461 if (aprefix == NULL)
8462 atts[nbatts++] = NULL;
8463 else
8464 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8465 atts[nbatts++] = defaults->values[4 * i + 2];
8466 atts[nbatts++] = defaults->values[4 * i + 3];
8467 nbdef++;
8468 }
8469 }
8470 }
8471 }
8472
Daniel Veillarde70c8772003-11-25 07:21:18 +00008473 /*
8474 * The attributes checkings
8475 */
8476 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008477 /*
8478 * The default namespace does not apply to attribute names.
8479 */
8480 if (atts[i + 1] != NULL) {
8481 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8482 if (nsname == NULL) {
8483 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8484 "Namespace prefix %s for %s on %s is not defined\n",
8485 atts[i + 1], atts[i], localname);
8486 }
8487 atts[i + 2] = nsname;
8488 } else
8489 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008490 /*
8491 * [ WFC: Unique Att Spec ]
8492 * No attribute name may appear more than once in the same
8493 * start-tag or empty-element tag.
8494 * As extended by the Namespace in XML REC.
8495 */
8496 for (j = 0; j < i;j += 5) {
8497 if (atts[i] == atts[j]) {
8498 if (atts[i+1] == atts[j+1]) {
8499 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8500 break;
8501 }
8502 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8503 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8504 "Namespaced Attribute %s in '%s' redefined\n",
8505 atts[i], nsname, NULL);
8506 break;
8507 }
8508 }
8509 }
8510 }
8511
Daniel Veillarde57ec792003-09-10 10:50:59 +00008512 nsname = xmlGetNamespace(ctxt, prefix);
8513 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008514 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8515 "Namespace prefix %s on %s is not defined\n",
8516 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008517 }
8518 *pref = prefix;
8519 *URI = nsname;
8520
8521 /*
8522 * SAX: Start of Element !
8523 */
8524 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8525 (!ctxt->disableSAX)) {
8526 if (nbNs > 0)
8527 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8528 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8529 nbatts / 5, nbdef, atts);
8530 else
8531 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8532 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8533 }
8534
8535 /*
8536 * Free up attribute allocated strings if needed
8537 */
8538 if (attval != 0) {
8539 for (i = 3,j = 0; j < nratts;i += 5,j++)
8540 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8541 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008542 }
8543
8544 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008545
8546base_changed:
8547 /*
8548 * the attribute strings are valid iif the base didn't changed
8549 */
8550 if (attval != 0) {
8551 for (i = 3,j = 0; j < nratts;i += 5,j++)
8552 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8553 xmlFree((xmlChar *) atts[i]);
8554 }
8555 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008556 ctxt->input->line = oldline;
8557 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008558 if (ctxt->wellFormed == 1) {
8559 goto reparse;
8560 }
8561 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562}
8563
8564/**
8565 * xmlParseEndTag2:
8566 * @ctxt: an XML parser context
8567 * @line: line of the start tag
8568 * @nsNr: number of namespaces on the start tag
8569 *
8570 * parse an end of tag
8571 *
8572 * [42] ETag ::= '</' Name S? '>'
8573 *
8574 * With namespace
8575 *
8576 * [NS 9] ETag ::= '</' QName S? '>'
8577 */
8578
8579static void
8580xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008581 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582 const xmlChar *name;
8583
8584 GROW;
8585 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008586 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008587 return;
8588 }
8589 SKIP(2);
8590
William M. Brack13dfa872004-09-18 04:52:08 +00008591 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008592 if (ctxt->input->cur[tlen] == '>') {
8593 ctxt->input->cur += tlen + 1;
8594 goto done;
8595 }
8596 ctxt->input->cur += tlen;
8597 name = (xmlChar*)1;
8598 } else {
8599 if (prefix == NULL)
8600 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8601 else
8602 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8603 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008604
8605 /*
8606 * We should definitely be at the ending "S? '>'" part
8607 */
8608 GROW;
8609 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008610 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008611 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008612 } else
8613 NEXT1;
8614
8615 /*
8616 * [ WFC: Element Type Match ]
8617 * The Name in an element's end-tag must match the element type in the
8618 * start-tag.
8619 *
8620 */
8621 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008622 if (name == NULL) name = BAD_CAST "unparseable";
8623 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008624 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008625 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008626 }
8627
8628 /*
8629 * SAX: End of Tag
8630 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008631done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008632 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8633 (!ctxt->disableSAX))
8634 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8635
Daniel Veillard0fb18932003-09-07 09:14:37 +00008636 spacePop(ctxt);
8637 if (nsNr != 0)
8638 nsPop(ctxt, nsNr);
8639 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008640}
8641
8642/**
Owen Taylor3473f882001-02-23 17:55:21 +00008643 * xmlParseCDSect:
8644 * @ctxt: an XML parser context
8645 *
8646 * Parse escaped pure raw content.
8647 *
8648 * [18] CDSect ::= CDStart CData CDEnd
8649 *
8650 * [19] CDStart ::= '<![CDATA['
8651 *
8652 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8653 *
8654 * [21] CDEnd ::= ']]>'
8655 */
8656void
8657xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8658 xmlChar *buf = NULL;
8659 int len = 0;
8660 int size = XML_PARSER_BUFFER_SIZE;
8661 int r, rl;
8662 int s, sl;
8663 int cur, l;
8664 int count = 0;
8665
Daniel Veillard8f597c32003-10-06 08:19:27 +00008666 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008667 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008668 SKIP(9);
8669 } else
8670 return;
8671
8672 ctxt->instate = XML_PARSER_CDATA_SECTION;
8673 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008674 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008675 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008676 ctxt->instate = XML_PARSER_CONTENT;
8677 return;
8678 }
8679 NEXTL(rl);
8680 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008681 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008682 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008683 ctxt->instate = XML_PARSER_CONTENT;
8684 return;
8685 }
8686 NEXTL(sl);
8687 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008688 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008689 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008690 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008691 return;
8692 }
William M. Brack871611b2003-10-18 04:53:14 +00008693 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008694 ((r != ']') || (s != ']') || (cur != '>'))) {
8695 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 xmlChar *tmp;
8697
Owen Taylor3473f882001-02-23 17:55:21 +00008698 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008699 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8700 if (tmp == NULL) {
8701 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008702 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008703 return;
8704 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008705 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008706 }
8707 COPY_BUF(rl,buf,len,r);
8708 r = s;
8709 rl = sl;
8710 s = cur;
8711 sl = l;
8712 count++;
8713 if (count > 50) {
8714 GROW;
8715 count = 0;
8716 }
8717 NEXTL(l);
8718 cur = CUR_CHAR(l);
8719 }
8720 buf[len] = 0;
8721 ctxt->instate = XML_PARSER_CONTENT;
8722 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008723 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008724 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008725 xmlFree(buf);
8726 return;
8727 }
8728 NEXTL(l);
8729
8730 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008731 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008732 */
8733 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8734 if (ctxt->sax->cdataBlock != NULL)
8735 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008736 else if (ctxt->sax->characters != NULL)
8737 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008738 }
8739 xmlFree(buf);
8740}
8741
8742/**
8743 * xmlParseContent:
8744 * @ctxt: an XML parser context
8745 *
8746 * Parse a content:
8747 *
8748 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8749 */
8750
8751void
8752xmlParseContent(xmlParserCtxtPtr ctxt) {
8753 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008754 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008755 ((RAW != '<') || (NXT(1) != '/')) &&
8756 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008757 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008758 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008759 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008760
8761 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008762 * First case : a Processing Instruction.
8763 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008764 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008765 xmlParsePI(ctxt);
8766 }
8767
8768 /*
8769 * Second case : a CDSection
8770 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008771 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008772 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008773 xmlParseCDSect(ctxt);
8774 }
8775
8776 /*
8777 * Third case : a comment
8778 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008779 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008780 (NXT(2) == '-') && (NXT(3) == '-')) {
8781 xmlParseComment(ctxt);
8782 ctxt->instate = XML_PARSER_CONTENT;
8783 }
8784
8785 /*
8786 * Fourth case : a sub-element.
8787 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008788 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008789 xmlParseElement(ctxt);
8790 }
8791
8792 /*
8793 * Fifth case : a reference. If if has not been resolved,
8794 * parsing returns it's Name, create the node
8795 */
8796
Daniel Veillard21a0f912001-02-25 19:54:14 +00008797 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008798 xmlParseReference(ctxt);
8799 }
8800
8801 /*
8802 * Last case, text. Note that References are handled directly.
8803 */
8804 else {
8805 xmlParseCharData(ctxt, 0);
8806 }
8807
8808 GROW;
8809 /*
8810 * Pop-up of finished entities.
8811 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008812 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008813 xmlPopInput(ctxt);
8814 SHRINK;
8815
Daniel Veillardfdc91562002-07-01 21:52:03 +00008816 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008817 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8818 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008819 ctxt->instate = XML_PARSER_EOF;
8820 break;
8821 }
8822 }
8823}
8824
8825/**
8826 * xmlParseElement:
8827 * @ctxt: an XML parser context
8828 *
8829 * parse an XML element, this is highly recursive
8830 *
8831 * [39] element ::= EmptyElemTag | STag content ETag
8832 *
8833 * [ WFC: Element Type Match ]
8834 * The Name in an element's end-tag must match the element type in the
8835 * start-tag.
8836 *
Owen Taylor3473f882001-02-23 17:55:21 +00008837 */
8838
8839void
8840xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008841 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008842 const xmlChar *prefix;
8843 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008844 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008845 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008846 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008848
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008849 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8850 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8851 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8852 xmlParserMaxDepth);
8853 ctxt->instate = XML_PARSER_EOF;
8854 return;
8855 }
8856
Owen Taylor3473f882001-02-23 17:55:21 +00008857 /* Capture start position */
8858 if (ctxt->record_info) {
8859 node_info.begin_pos = ctxt->input->consumed +
8860 (CUR_PTR - ctxt->input->base);
8861 node_info.begin_line = ctxt->input->line;
8862 }
8863
8864 if (ctxt->spaceNr == 0)
8865 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008866 else if (*ctxt->space == -2)
8867 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008868 else
8869 spacePush(ctxt, *ctxt->space);
8870
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008871 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008872#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008874#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008875 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008876#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 else
8878 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008879#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008880 if (name == NULL) {
8881 spacePop(ctxt);
8882 return;
8883 }
8884 namePush(ctxt, name);
8885 ret = ctxt->node;
8886
Daniel Veillard4432df22003-09-28 18:58:27 +00008887#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008888 /*
8889 * [ VC: Root Element Type ]
8890 * The Name in the document type declaration must match the element
8891 * type of the root element.
8892 */
8893 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8894 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8895 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008896#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008897
8898 /*
8899 * Check for an Empty Element.
8900 */
8901 if ((RAW == '/') && (NXT(1) == '>')) {
8902 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008903 if (ctxt->sax2) {
8904 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8905 (!ctxt->disableSAX))
8906 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008907#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 } else {
8909 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8910 (!ctxt->disableSAX))
8911 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008912#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008913 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008914 namePop(ctxt);
8915 spacePop(ctxt);
8916 if (nsNr != ctxt->nsNr)
8917 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008918 if ( ret != NULL && ctxt->record_info ) {
8919 node_info.end_pos = ctxt->input->consumed +
8920 (CUR_PTR - ctxt->input->base);
8921 node_info.end_line = ctxt->input->line;
8922 node_info.node = ret;
8923 xmlParserAddNodeInfo(ctxt, &node_info);
8924 }
8925 return;
8926 }
8927 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008928 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008929 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008930 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8931 "Couldn't find end of Start Tag %s line %d\n",
8932 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008933
8934 /*
8935 * end of parsing of this node.
8936 */
8937 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008938 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008939 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008940 if (nsNr != ctxt->nsNr)
8941 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008942
8943 /*
8944 * Capture end position and add node
8945 */
8946 if ( ret != NULL && ctxt->record_info ) {
8947 node_info.end_pos = ctxt->input->consumed +
8948 (CUR_PTR - ctxt->input->base);
8949 node_info.end_line = ctxt->input->line;
8950 node_info.node = ret;
8951 xmlParserAddNodeInfo(ctxt, &node_info);
8952 }
8953 return;
8954 }
8955
8956 /*
8957 * Parse the content of the element:
8958 */
8959 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008960 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008961 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008962 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008963 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008964
8965 /*
8966 * end of parsing of this node.
8967 */
8968 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008969 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008970 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008971 if (nsNr != ctxt->nsNr)
8972 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008973 return;
8974 }
8975
8976 /*
8977 * parse the end of tag: '</' should be here.
8978 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008979 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008980 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008981 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008982 }
8983#ifdef LIBXML_SAX1_ENABLED
8984 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008985 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008986#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008987
8988 /*
8989 * Capture end position and add node
8990 */
8991 if ( ret != NULL && ctxt->record_info ) {
8992 node_info.end_pos = ctxt->input->consumed +
8993 (CUR_PTR - ctxt->input->base);
8994 node_info.end_line = ctxt->input->line;
8995 node_info.node = ret;
8996 xmlParserAddNodeInfo(ctxt, &node_info);
8997 }
8998}
8999
9000/**
9001 * xmlParseVersionNum:
9002 * @ctxt: an XML parser context
9003 *
9004 * parse the XML version value.
9005 *
9006 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
9007 *
9008 * Returns the string giving the XML version number, or NULL
9009 */
9010xmlChar *
9011xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9012 xmlChar *buf = NULL;
9013 int len = 0;
9014 int size = 10;
9015 xmlChar cur;
9016
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009017 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009018 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009019 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009020 return(NULL);
9021 }
9022 cur = CUR;
9023 while (((cur >= 'a') && (cur <= 'z')) ||
9024 ((cur >= 'A') && (cur <= 'Z')) ||
9025 ((cur >= '0') && (cur <= '9')) ||
9026 (cur == '_') || (cur == '.') ||
9027 (cur == ':') || (cur == '-')) {
9028 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009029 xmlChar *tmp;
9030
Owen Taylor3473f882001-02-23 17:55:21 +00009031 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009032 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9033 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009034 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009035 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009036 return(NULL);
9037 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009038 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009039 }
9040 buf[len++] = cur;
9041 NEXT;
9042 cur=CUR;
9043 }
9044 buf[len] = 0;
9045 return(buf);
9046}
9047
9048/**
9049 * xmlParseVersionInfo:
9050 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009051 *
Owen Taylor3473f882001-02-23 17:55:21 +00009052 * parse the XML version.
9053 *
9054 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009055 *
Owen Taylor3473f882001-02-23 17:55:21 +00009056 * [25] Eq ::= S? '=' S?
9057 *
9058 * Returns the version string, e.g. "1.0"
9059 */
9060
9061xmlChar *
9062xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9063 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009064
Daniel Veillarda07050d2003-10-19 14:46:32 +00009065 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009066 SKIP(7);
9067 SKIP_BLANKS;
9068 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009069 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009070 return(NULL);
9071 }
9072 NEXT;
9073 SKIP_BLANKS;
9074 if (RAW == '"') {
9075 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009076 version = xmlParseVersionNum(ctxt);
9077 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009078 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009079 } else
9080 NEXT;
9081 } else if (RAW == '\''){
9082 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009083 version = xmlParseVersionNum(ctxt);
9084 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009085 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009086 } else
9087 NEXT;
9088 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009089 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009090 }
9091 }
9092 return(version);
9093}
9094
9095/**
9096 * xmlParseEncName:
9097 * @ctxt: an XML parser context
9098 *
9099 * parse the XML encoding name
9100 *
9101 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9102 *
9103 * Returns the encoding name value or NULL
9104 */
9105xmlChar *
9106xmlParseEncName(xmlParserCtxtPtr ctxt) {
9107 xmlChar *buf = NULL;
9108 int len = 0;
9109 int size = 10;
9110 xmlChar cur;
9111
9112 cur = CUR;
9113 if (((cur >= 'a') && (cur <= 'z')) ||
9114 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009115 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009116 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009117 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009118 return(NULL);
9119 }
9120
9121 buf[len++] = cur;
9122 NEXT;
9123 cur = CUR;
9124 while (((cur >= 'a') && (cur <= 'z')) ||
9125 ((cur >= 'A') && (cur <= 'Z')) ||
9126 ((cur >= '0') && (cur <= '9')) ||
9127 (cur == '.') || (cur == '_') ||
9128 (cur == '-')) {
9129 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009130 xmlChar *tmp;
9131
Owen Taylor3473f882001-02-23 17:55:21 +00009132 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009133 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9134 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009135 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009136 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009137 return(NULL);
9138 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009139 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009140 }
9141 buf[len++] = cur;
9142 NEXT;
9143 cur = CUR;
9144 if (cur == 0) {
9145 SHRINK;
9146 GROW;
9147 cur = CUR;
9148 }
9149 }
9150 buf[len] = 0;
9151 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009152 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009153 }
9154 return(buf);
9155}
9156
9157/**
9158 * xmlParseEncodingDecl:
9159 * @ctxt: an XML parser context
9160 *
9161 * parse the XML encoding declaration
9162 *
9163 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9164 *
9165 * this setups the conversion filters.
9166 *
9167 * Returns the encoding value or NULL
9168 */
9169
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009170const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009171xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9172 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009173
9174 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009175 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009176 SKIP(8);
9177 SKIP_BLANKS;
9178 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009179 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009180 return(NULL);
9181 }
9182 NEXT;
9183 SKIP_BLANKS;
9184 if (RAW == '"') {
9185 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009186 encoding = xmlParseEncName(ctxt);
9187 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009188 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009189 } else
9190 NEXT;
9191 } else if (RAW == '\''){
9192 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009193 encoding = xmlParseEncName(ctxt);
9194 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009195 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009196 } else
9197 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009198 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009199 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009200 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009201 /*
9202 * UTF-16 encoding stwich has already taken place at this stage,
9203 * more over the little-endian/big-endian selection is already done
9204 */
9205 if ((encoding != NULL) &&
9206 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9207 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009208 if (ctxt->encoding != NULL)
9209 xmlFree((xmlChar *) ctxt->encoding);
9210 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009211 }
9212 /*
9213 * UTF-8 encoding is handled natively
9214 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009215 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009216 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9217 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009218 if (ctxt->encoding != NULL)
9219 xmlFree((xmlChar *) ctxt->encoding);
9220 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009221 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009222 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009223 xmlCharEncodingHandlerPtr handler;
9224
9225 if (ctxt->input->encoding != NULL)
9226 xmlFree((xmlChar *) ctxt->input->encoding);
9227 ctxt->input->encoding = encoding;
9228
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009229 handler = xmlFindCharEncodingHandler((const char *) encoding);
9230 if (handler != NULL) {
9231 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009232 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009233 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009234 "Unsupported encoding %s\n", encoding);
9235 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009236 }
9237 }
9238 }
9239 return(encoding);
9240}
9241
9242/**
9243 * xmlParseSDDecl:
9244 * @ctxt: an XML parser context
9245 *
9246 * parse the XML standalone declaration
9247 *
9248 * [32] SDDecl ::= S 'standalone' Eq
9249 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9250 *
9251 * [ VC: Standalone Document Declaration ]
9252 * TODO The standalone document declaration must have the value "no"
9253 * if any external markup declarations contain declarations of:
9254 * - attributes with default values, if elements to which these
9255 * attributes apply appear in the document without specifications
9256 * of values for these attributes, or
9257 * - entities (other than amp, lt, gt, apos, quot), if references
9258 * to those entities appear in the document, or
9259 * - attributes with values subject to normalization, where the
9260 * attribute appears in the document with a value which will change
9261 * as a result of normalization, or
9262 * - element types with element content, if white space occurs directly
9263 * within any instance of those types.
9264 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009265 * Returns:
9266 * 1 if standalone="yes"
9267 * 0 if standalone="no"
9268 * -2 if standalone attribute is missing or invalid
9269 * (A standalone value of -2 means that the XML declaration was found,
9270 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009271 */
9272
9273int
9274xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009275 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009276
9277 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009278 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009279 SKIP(10);
9280 SKIP_BLANKS;
9281 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009282 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009283 return(standalone);
9284 }
9285 NEXT;
9286 SKIP_BLANKS;
9287 if (RAW == '\''){
9288 NEXT;
9289 if ((RAW == 'n') && (NXT(1) == 'o')) {
9290 standalone = 0;
9291 SKIP(2);
9292 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9293 (NXT(2) == 's')) {
9294 standalone = 1;
9295 SKIP(3);
9296 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009297 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009298 }
9299 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009301 } else
9302 NEXT;
9303 } else if (RAW == '"'){
9304 NEXT;
9305 if ((RAW == 'n') && (NXT(1) == 'o')) {
9306 standalone = 0;
9307 SKIP(2);
9308 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9309 (NXT(2) == 's')) {
9310 standalone = 1;
9311 SKIP(3);
9312 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009313 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009314 }
9315 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009316 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009317 } else
9318 NEXT;
9319 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009320 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009321 }
9322 }
9323 return(standalone);
9324}
9325
9326/**
9327 * xmlParseXMLDecl:
9328 * @ctxt: an XML parser context
9329 *
9330 * parse an XML declaration header
9331 *
9332 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9333 */
9334
9335void
9336xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9337 xmlChar *version;
9338
9339 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009340 * This value for standalone indicates that the document has an
9341 * XML declaration but it does not have a standalone attribute.
9342 * It will be overwritten later if a standalone attribute is found.
9343 */
9344 ctxt->input->standalone = -2;
9345
9346 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009347 * We know that '<?xml' is here.
9348 */
9349 SKIP(5);
9350
William M. Brack76e95df2003-10-18 16:20:14 +00009351 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009352 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9353 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009354 }
9355 SKIP_BLANKS;
9356
9357 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009358 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009359 */
9360 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009361 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009362 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009363 } else {
9364 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9365 /*
9366 * TODO: Blueberry should be detected here
9367 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009368 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9369 "Unsupported version '%s'\n",
9370 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009371 }
9372 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009373 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009374 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009375 }
Owen Taylor3473f882001-02-23 17:55:21 +00009376
9377 /*
9378 * We may have the encoding declaration
9379 */
William M. Brack76e95df2003-10-18 16:20:14 +00009380 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009381 if ((RAW == '?') && (NXT(1) == '>')) {
9382 SKIP(2);
9383 return;
9384 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009386 }
9387 xmlParseEncodingDecl(ctxt);
9388 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9389 /*
9390 * The XML REC instructs us to stop parsing right here
9391 */
9392 return;
9393 }
9394
9395 /*
9396 * We may have the standalone status.
9397 */
William M. Brack76e95df2003-10-18 16:20:14 +00009398 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009399 if ((RAW == '?') && (NXT(1) == '>')) {
9400 SKIP(2);
9401 return;
9402 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009404 }
9405 SKIP_BLANKS;
9406 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9407
9408 SKIP_BLANKS;
9409 if ((RAW == '?') && (NXT(1) == '>')) {
9410 SKIP(2);
9411 } else if (RAW == '>') {
9412 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009413 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009414 NEXT;
9415 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009416 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009417 MOVETO_ENDTAG(CUR_PTR);
9418 NEXT;
9419 }
9420}
9421
9422/**
9423 * xmlParseMisc:
9424 * @ctxt: an XML parser context
9425 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009426 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009427 *
9428 * [27] Misc ::= Comment | PI | S
9429 */
9430
9431void
9432xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009433 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009434 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009435 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009436 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009437 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009438 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009439 NEXT;
9440 } else
9441 xmlParseComment(ctxt);
9442 }
9443}
9444
9445/**
9446 * xmlParseDocument:
9447 * @ctxt: an XML parser context
9448 *
9449 * parse an XML document (and build a tree if using the standard SAX
9450 * interface).
9451 *
9452 * [1] document ::= prolog element Misc*
9453 *
9454 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9455 *
9456 * Returns 0, -1 in case of error. the parser context is augmented
9457 * as a result of the parsing.
9458 */
9459
9460int
9461xmlParseDocument(xmlParserCtxtPtr ctxt) {
9462 xmlChar start[4];
9463 xmlCharEncoding enc;
9464
9465 xmlInitParser();
9466
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009467 if ((ctxt == NULL) || (ctxt->input == NULL))
9468 return(-1);
9469
Owen Taylor3473f882001-02-23 17:55:21 +00009470 GROW;
9471
9472 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009473 * SAX: detecting the level.
9474 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009475 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009476
9477 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009478 * SAX: beginning of the document processing.
9479 */
9480 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9481 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9482
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009483 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9484 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009485 /*
9486 * Get the 4 first bytes and decode the charset
9487 * if enc != XML_CHAR_ENCODING_NONE
9488 * plug some encoding conversion routines.
9489 */
9490 start[0] = RAW;
9491 start[1] = NXT(1);
9492 start[2] = NXT(2);
9493 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009494 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009495 if (enc != XML_CHAR_ENCODING_NONE) {
9496 xmlSwitchEncoding(ctxt, enc);
9497 }
Owen Taylor3473f882001-02-23 17:55:21 +00009498 }
9499
9500
9501 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009502 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009503 }
9504
9505 /*
9506 * Check for the XMLDecl in the Prolog.
9507 */
9508 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009509 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009510
9511 /*
9512 * Note that we will switch encoding on the fly.
9513 */
9514 xmlParseXMLDecl(ctxt);
9515 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9516 /*
9517 * The XML REC instructs us to stop parsing right here
9518 */
9519 return(-1);
9520 }
9521 ctxt->standalone = ctxt->input->standalone;
9522 SKIP_BLANKS;
9523 } else {
9524 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9525 }
9526 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9527 ctxt->sax->startDocument(ctxt->userData);
9528
9529 /*
9530 * The Misc part of the Prolog
9531 */
9532 GROW;
9533 xmlParseMisc(ctxt);
9534
9535 /*
9536 * Then possibly doc type declaration(s) and more Misc
9537 * (doctypedecl Misc*)?
9538 */
9539 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009540 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009541
9542 ctxt->inSubset = 1;
9543 xmlParseDocTypeDecl(ctxt);
9544 if (RAW == '[') {
9545 ctxt->instate = XML_PARSER_DTD;
9546 xmlParseInternalSubset(ctxt);
9547 }
9548
9549 /*
9550 * Create and update the external subset.
9551 */
9552 ctxt->inSubset = 2;
9553 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9554 (!ctxt->disableSAX))
9555 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9556 ctxt->extSubSystem, ctxt->extSubURI);
9557 ctxt->inSubset = 0;
9558
Daniel Veillardac4118d2008-01-11 05:27:32 +00009559 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009560
9561 ctxt->instate = XML_PARSER_PROLOG;
9562 xmlParseMisc(ctxt);
9563 }
9564
9565 /*
9566 * Time to start parsing the tree itself
9567 */
9568 GROW;
9569 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009570 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9571 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009572 } else {
9573 ctxt->instate = XML_PARSER_CONTENT;
9574 xmlParseElement(ctxt);
9575 ctxt->instate = XML_PARSER_EPILOG;
9576
9577
9578 /*
9579 * The Misc part at the end
9580 */
9581 xmlParseMisc(ctxt);
9582
Daniel Veillard561b7f82002-03-20 21:55:57 +00009583 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009584 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009585 }
9586 ctxt->instate = XML_PARSER_EOF;
9587 }
9588
9589 /*
9590 * SAX: end of the document processing.
9591 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009592 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009593 ctxt->sax->endDocument(ctxt->userData);
9594
Daniel Veillard5997aca2002-03-18 18:36:20 +00009595 /*
9596 * Remove locally kept entity definitions if the tree was not built
9597 */
9598 if ((ctxt->myDoc != NULL) &&
9599 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9600 xmlFreeDoc(ctxt->myDoc);
9601 ctxt->myDoc = NULL;
9602 }
9603
Daniel Veillardc7612992002-02-17 22:47:37 +00009604 if (! ctxt->wellFormed) {
9605 ctxt->valid = 0;
9606 return(-1);
9607 }
Owen Taylor3473f882001-02-23 17:55:21 +00009608 return(0);
9609}
9610
9611/**
9612 * xmlParseExtParsedEnt:
9613 * @ctxt: an XML parser context
9614 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009615 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009616 * An external general parsed entity is well-formed if it matches the
9617 * production labeled extParsedEnt.
9618 *
9619 * [78] extParsedEnt ::= TextDecl? content
9620 *
9621 * Returns 0, -1 in case of error. the parser context is augmented
9622 * as a result of the parsing.
9623 */
9624
9625int
9626xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9627 xmlChar start[4];
9628 xmlCharEncoding enc;
9629
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009630 if ((ctxt == NULL) || (ctxt->input == NULL))
9631 return(-1);
9632
Owen Taylor3473f882001-02-23 17:55:21 +00009633 xmlDefaultSAXHandlerInit();
9634
Daniel Veillard309f81d2003-09-23 09:02:53 +00009635 xmlDetectSAX2(ctxt);
9636
Owen Taylor3473f882001-02-23 17:55:21 +00009637 GROW;
9638
9639 /*
9640 * SAX: beginning of the document processing.
9641 */
9642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9643 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9644
9645 /*
9646 * Get the 4 first bytes and decode the charset
9647 * if enc != XML_CHAR_ENCODING_NONE
9648 * plug some encoding conversion routines.
9649 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009650 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9651 start[0] = RAW;
9652 start[1] = NXT(1);
9653 start[2] = NXT(2);
9654 start[3] = NXT(3);
9655 enc = xmlDetectCharEncoding(start, 4);
9656 if (enc != XML_CHAR_ENCODING_NONE) {
9657 xmlSwitchEncoding(ctxt, enc);
9658 }
Owen Taylor3473f882001-02-23 17:55:21 +00009659 }
9660
9661
9662 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009663 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009664 }
9665
9666 /*
9667 * Check for the XMLDecl in the Prolog.
9668 */
9669 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009670 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009671
9672 /*
9673 * Note that we will switch encoding on the fly.
9674 */
9675 xmlParseXMLDecl(ctxt);
9676 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9677 /*
9678 * The XML REC instructs us to stop parsing right here
9679 */
9680 return(-1);
9681 }
9682 SKIP_BLANKS;
9683 } else {
9684 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9685 }
9686 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9687 ctxt->sax->startDocument(ctxt->userData);
9688
9689 /*
9690 * Doing validity checking on chunk doesn't make sense
9691 */
9692 ctxt->instate = XML_PARSER_CONTENT;
9693 ctxt->validate = 0;
9694 ctxt->loadsubset = 0;
9695 ctxt->depth = 0;
9696
9697 xmlParseContent(ctxt);
9698
9699 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009701 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009703 }
9704
9705 /*
9706 * SAX: end of the document processing.
9707 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009708 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009709 ctxt->sax->endDocument(ctxt->userData);
9710
9711 if (! ctxt->wellFormed) return(-1);
9712 return(0);
9713}
9714
Daniel Veillard73b013f2003-09-30 12:36:01 +00009715#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009716/************************************************************************
9717 * *
9718 * Progressive parsing interfaces *
9719 * *
9720 ************************************************************************/
9721
9722/**
9723 * xmlParseLookupSequence:
9724 * @ctxt: an XML parser context
9725 * @first: the first char to lookup
9726 * @next: the next char to lookup or zero
9727 * @third: the next char to lookup or zero
9728 *
9729 * Try to find if a sequence (first, next, third) or just (first next) or
9730 * (first) is available in the input stream.
9731 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9732 * to avoid rescanning sequences of bytes, it DOES change the state of the
9733 * parser, do not use liberally.
9734 *
9735 * Returns the index to the current parsing point if the full sequence
9736 * is available, -1 otherwise.
9737 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009738static int
Owen Taylor3473f882001-02-23 17:55:21 +00009739xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9740 xmlChar next, xmlChar third) {
9741 int base, len;
9742 xmlParserInputPtr in;
9743 const xmlChar *buf;
9744
9745 in = ctxt->input;
9746 if (in == NULL) return(-1);
9747 base = in->cur - in->base;
9748 if (base < 0) return(-1);
9749 if (ctxt->checkIndex > base)
9750 base = ctxt->checkIndex;
9751 if (in->buf == NULL) {
9752 buf = in->base;
9753 len = in->length;
9754 } else {
9755 buf = in->buf->buffer->content;
9756 len = in->buf->buffer->use;
9757 }
9758 /* take into account the sequence length */
9759 if (third) len -= 2;
9760 else if (next) len --;
9761 for (;base < len;base++) {
9762 if (buf[base] == first) {
9763 if (third != 0) {
9764 if ((buf[base + 1] != next) ||
9765 (buf[base + 2] != third)) continue;
9766 } else if (next != 0) {
9767 if (buf[base + 1] != next) continue;
9768 }
9769 ctxt->checkIndex = 0;
9770#ifdef DEBUG_PUSH
9771 if (next == 0)
9772 xmlGenericError(xmlGenericErrorContext,
9773 "PP: lookup '%c' found at %d\n",
9774 first, base);
9775 else if (third == 0)
9776 xmlGenericError(xmlGenericErrorContext,
9777 "PP: lookup '%c%c' found at %d\n",
9778 first, next, base);
9779 else
9780 xmlGenericError(xmlGenericErrorContext,
9781 "PP: lookup '%c%c%c' found at %d\n",
9782 first, next, third, base);
9783#endif
9784 return(base - (in->cur - in->base));
9785 }
9786 }
9787 ctxt->checkIndex = base;
9788#ifdef DEBUG_PUSH
9789 if (next == 0)
9790 xmlGenericError(xmlGenericErrorContext,
9791 "PP: lookup '%c' failed\n", first);
9792 else if (third == 0)
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: lookup '%c%c' failed\n", first, next);
9795 else
9796 xmlGenericError(xmlGenericErrorContext,
9797 "PP: lookup '%c%c%c' failed\n", first, next, third);
9798#endif
9799 return(-1);
9800}
9801
9802/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009803 * xmlParseGetLasts:
9804 * @ctxt: an XML parser context
9805 * @lastlt: pointer to store the last '<' from the input
9806 * @lastgt: pointer to store the last '>' from the input
9807 *
9808 * Lookup the last < and > in the current chunk
9809 */
9810static void
9811xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9812 const xmlChar **lastgt) {
9813 const xmlChar *tmp;
9814
9815 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9816 xmlGenericError(xmlGenericErrorContext,
9817 "Internal error: xmlParseGetLasts\n");
9818 return;
9819 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009820 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 tmp = ctxt->input->end;
9822 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009823 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009824 if (tmp < ctxt->input->base) {
9825 *lastlt = NULL;
9826 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009827 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009828 *lastlt = tmp;
9829 tmp++;
9830 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9831 if (*tmp == '\'') {
9832 tmp++;
9833 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9834 if (tmp < ctxt->input->end) tmp++;
9835 } else if (*tmp == '"') {
9836 tmp++;
9837 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9838 if (tmp < ctxt->input->end) tmp++;
9839 } else
9840 tmp++;
9841 }
9842 if (tmp < ctxt->input->end)
9843 *lastgt = tmp;
9844 else {
9845 tmp = *lastlt;
9846 tmp--;
9847 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9848 if (tmp >= ctxt->input->base)
9849 *lastgt = tmp;
9850 else
9851 *lastgt = NULL;
9852 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009853 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009854 } else {
9855 *lastlt = NULL;
9856 *lastgt = NULL;
9857 }
9858}
9859/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009860 * xmlCheckCdataPush:
9861 * @cur: pointer to the bock of characters
9862 * @len: length of the block in bytes
9863 *
9864 * Check that the block of characters is okay as SCdata content [20]
9865 *
9866 * Returns the number of bytes to pass if okay, a negative index where an
9867 * UTF-8 error occured otherwise
9868 */
9869static int
9870xmlCheckCdataPush(const xmlChar *utf, int len) {
9871 int ix;
9872 unsigned char c;
9873 int codepoint;
9874
9875 if ((utf == NULL) || (len <= 0))
9876 return(0);
9877
9878 for (ix = 0; ix < len;) { /* string is 0-terminated */
9879 c = utf[ix];
9880 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9881 if (c >= 0x20)
9882 ix++;
9883 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9884 ix++;
9885 else
9886 return(-ix);
9887 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9888 if (ix + 2 > len) return(ix);
9889 if ((utf[ix+1] & 0xc0 ) != 0x80)
9890 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009891 codepoint = (utf[ix] & 0x1f) << 6;
9892 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009893 if (!xmlIsCharQ(codepoint))
9894 return(-ix);
9895 ix += 2;
9896 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9897 if (ix + 3 > len) return(ix);
9898 if (((utf[ix+1] & 0xc0) != 0x80) ||
9899 ((utf[ix+2] & 0xc0) != 0x80))
9900 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009901 codepoint = (utf[ix] & 0xf) << 12;
9902 codepoint |= (utf[ix+1] & 0x3f) << 6;
9903 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009904 if (!xmlIsCharQ(codepoint))
9905 return(-ix);
9906 ix += 3;
9907 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9908 if (ix + 4 > len) return(ix);
9909 if (((utf[ix+1] & 0xc0) != 0x80) ||
9910 ((utf[ix+2] & 0xc0) != 0x80) ||
9911 ((utf[ix+3] & 0xc0) != 0x80))
9912 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009913 codepoint = (utf[ix] & 0x7) << 18;
9914 codepoint |= (utf[ix+1] & 0x3f) << 12;
9915 codepoint |= (utf[ix+2] & 0x3f) << 6;
9916 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009917 if (!xmlIsCharQ(codepoint))
9918 return(-ix);
9919 ix += 4;
9920 } else /* unknown encoding */
9921 return(-ix);
9922 }
9923 return(ix);
9924}
9925
9926/**
Owen Taylor3473f882001-02-23 17:55:21 +00009927 * xmlParseTryOrFinish:
9928 * @ctxt: an XML parser context
9929 * @terminate: last chunk indicator
9930 *
9931 * Try to progress on parsing
9932 *
9933 * Returns zero if no parsing was possible
9934 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009935static int
Owen Taylor3473f882001-02-23 17:55:21 +00009936xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9937 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009938 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009939 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009940 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009941
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009942 if (ctxt->input == NULL)
9943 return(0);
9944
Owen Taylor3473f882001-02-23 17:55:21 +00009945#ifdef DEBUG_PUSH
9946 switch (ctxt->instate) {
9947 case XML_PARSER_EOF:
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: try EOF\n"); break;
9950 case XML_PARSER_START:
9951 xmlGenericError(xmlGenericErrorContext,
9952 "PP: try START\n"); break;
9953 case XML_PARSER_MISC:
9954 xmlGenericError(xmlGenericErrorContext,
9955 "PP: try MISC\n");break;
9956 case XML_PARSER_COMMENT:
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: try COMMENT\n");break;
9959 case XML_PARSER_PROLOG:
9960 xmlGenericError(xmlGenericErrorContext,
9961 "PP: try PROLOG\n");break;
9962 case XML_PARSER_START_TAG:
9963 xmlGenericError(xmlGenericErrorContext,
9964 "PP: try START_TAG\n");break;
9965 case XML_PARSER_CONTENT:
9966 xmlGenericError(xmlGenericErrorContext,
9967 "PP: try CONTENT\n");break;
9968 case XML_PARSER_CDATA_SECTION:
9969 xmlGenericError(xmlGenericErrorContext,
9970 "PP: try CDATA_SECTION\n");break;
9971 case XML_PARSER_END_TAG:
9972 xmlGenericError(xmlGenericErrorContext,
9973 "PP: try END_TAG\n");break;
9974 case XML_PARSER_ENTITY_DECL:
9975 xmlGenericError(xmlGenericErrorContext,
9976 "PP: try ENTITY_DECL\n");break;
9977 case XML_PARSER_ENTITY_VALUE:
9978 xmlGenericError(xmlGenericErrorContext,
9979 "PP: try ENTITY_VALUE\n");break;
9980 case XML_PARSER_ATTRIBUTE_VALUE:
9981 xmlGenericError(xmlGenericErrorContext,
9982 "PP: try ATTRIBUTE_VALUE\n");break;
9983 case XML_PARSER_DTD:
9984 xmlGenericError(xmlGenericErrorContext,
9985 "PP: try DTD\n");break;
9986 case XML_PARSER_EPILOG:
9987 xmlGenericError(xmlGenericErrorContext,
9988 "PP: try EPILOG\n");break;
9989 case XML_PARSER_PI:
9990 xmlGenericError(xmlGenericErrorContext,
9991 "PP: try PI\n");break;
9992 case XML_PARSER_IGNORE:
9993 xmlGenericError(xmlGenericErrorContext,
9994 "PP: try IGNORE\n");break;
9995 }
9996#endif
9997
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009998 if ((ctxt->input != NULL) &&
9999 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010000 xmlSHRINK(ctxt);
10001 ctxt->checkIndex = 0;
10002 }
10003 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010004
Daniel Veillarda880b122003-04-21 21:36:41 +000010005 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010006 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010007 return(0);
10008
10009
Owen Taylor3473f882001-02-23 17:55:21 +000010010 /*
10011 * Pop-up of finished entities.
10012 */
10013 while ((RAW == 0) && (ctxt->inputNr > 1))
10014 xmlPopInput(ctxt);
10015
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010016 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010017 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 avail = ctxt->input->length -
10019 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010020 else {
10021 /*
10022 * If we are operating on converted input, try to flush
10023 * remainng chars to avoid them stalling in the non-converted
10024 * buffer.
10025 */
10026 if ((ctxt->input->buf->raw != NULL) &&
10027 (ctxt->input->buf->raw->use > 0)) {
10028 int base = ctxt->input->base -
10029 ctxt->input->buf->buffer->content;
10030 int current = ctxt->input->cur - ctxt->input->base;
10031
10032 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10033 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10034 ctxt->input->cur = ctxt->input->base + current;
10035 ctxt->input->end =
10036 &ctxt->input->buf->buffer->content[
10037 ctxt->input->buf->buffer->use];
10038 }
10039 avail = ctxt->input->buf->buffer->use -
10040 (ctxt->input->cur - ctxt->input->base);
10041 }
Owen Taylor3473f882001-02-23 17:55:21 +000010042 if (avail < 1)
10043 goto done;
10044 switch (ctxt->instate) {
10045 case XML_PARSER_EOF:
10046 /*
10047 * Document parsing is done !
10048 */
10049 goto done;
10050 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010051 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10052 xmlChar start[4];
10053 xmlCharEncoding enc;
10054
10055 /*
10056 * Very first chars read from the document flow.
10057 */
10058 if (avail < 4)
10059 goto done;
10060
10061 /*
10062 * Get the 4 first bytes and decode the charset
10063 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010064 * plug some encoding conversion routines,
10065 * else xmlSwitchEncoding will set to (default)
10066 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010067 */
10068 start[0] = RAW;
10069 start[1] = NXT(1);
10070 start[2] = NXT(2);
10071 start[3] = NXT(3);
10072 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010073 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010074 break;
10075 }
Owen Taylor3473f882001-02-23 17:55:21 +000010076
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010077 if (avail < 2)
10078 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010079 cur = ctxt->input->cur[0];
10080 next = ctxt->input->cur[1];
10081 if (cur == 0) {
10082 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10083 ctxt->sax->setDocumentLocator(ctxt->userData,
10084 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010085 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010086 ctxt->instate = XML_PARSER_EOF;
10087#ifdef DEBUG_PUSH
10088 xmlGenericError(xmlGenericErrorContext,
10089 "PP: entering EOF\n");
10090#endif
10091 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10092 ctxt->sax->endDocument(ctxt->userData);
10093 goto done;
10094 }
10095 if ((cur == '<') && (next == '?')) {
10096 /* PI or XML decl */
10097 if (avail < 5) return(ret);
10098 if ((!terminate) &&
10099 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10100 return(ret);
10101 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10102 ctxt->sax->setDocumentLocator(ctxt->userData,
10103 &xmlDefaultSAXLocator);
10104 if ((ctxt->input->cur[2] == 'x') &&
10105 (ctxt->input->cur[3] == 'm') &&
10106 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010107 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010108 ret += 5;
10109#ifdef DEBUG_PUSH
10110 xmlGenericError(xmlGenericErrorContext,
10111 "PP: Parsing XML Decl\n");
10112#endif
10113 xmlParseXMLDecl(ctxt);
10114 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10115 /*
10116 * The XML REC instructs us to stop parsing right
10117 * here
10118 */
10119 ctxt->instate = XML_PARSER_EOF;
10120 return(0);
10121 }
10122 ctxt->standalone = ctxt->input->standalone;
10123 if ((ctxt->encoding == NULL) &&
10124 (ctxt->input->encoding != NULL))
10125 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10126 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10127 (!ctxt->disableSAX))
10128 ctxt->sax->startDocument(ctxt->userData);
10129 ctxt->instate = XML_PARSER_MISC;
10130#ifdef DEBUG_PUSH
10131 xmlGenericError(xmlGenericErrorContext,
10132 "PP: entering MISC\n");
10133#endif
10134 } else {
10135 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10136 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10137 (!ctxt->disableSAX))
10138 ctxt->sax->startDocument(ctxt->userData);
10139 ctxt->instate = XML_PARSER_MISC;
10140#ifdef DEBUG_PUSH
10141 xmlGenericError(xmlGenericErrorContext,
10142 "PP: entering MISC\n");
10143#endif
10144 }
10145 } else {
10146 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10147 ctxt->sax->setDocumentLocator(ctxt->userData,
10148 &xmlDefaultSAXLocator);
10149 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010150 if (ctxt->version == NULL) {
10151 xmlErrMemory(ctxt, NULL);
10152 break;
10153 }
Owen Taylor3473f882001-02-23 17:55:21 +000010154 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10155 (!ctxt->disableSAX))
10156 ctxt->sax->startDocument(ctxt->userData);
10157 ctxt->instate = XML_PARSER_MISC;
10158#ifdef DEBUG_PUSH
10159 xmlGenericError(xmlGenericErrorContext,
10160 "PP: entering MISC\n");
10161#endif
10162 }
10163 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010164 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010165 const xmlChar *name;
10166 const xmlChar *prefix;
10167 const xmlChar *URI;
10168 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010169
10170 if ((avail < 2) && (ctxt->inputNr == 1))
10171 goto done;
10172 cur = ctxt->input->cur[0];
10173 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010174 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010175 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010176 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10177 ctxt->sax->endDocument(ctxt->userData);
10178 goto done;
10179 }
10180 if (!terminate) {
10181 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010182 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010183 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010184 goto done;
10185 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10186 goto done;
10187 }
10188 }
10189 if (ctxt->spaceNr == 0)
10190 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010191 else if (*ctxt->space == -2)
10192 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010193 else
10194 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010195#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010196 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010197#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010198 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010199#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010200 else
10201 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010202#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010203 if (name == NULL) {
10204 spacePop(ctxt);
10205 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010206 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10207 ctxt->sax->endDocument(ctxt->userData);
10208 goto done;
10209 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010210#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010211 /*
10212 * [ VC: Root Element Type ]
10213 * The Name in the document type declaration must match
10214 * the element type of the root element.
10215 */
10216 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10217 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10218 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010219#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010220
10221 /*
10222 * Check for an Empty Element.
10223 */
10224 if ((RAW == '/') && (NXT(1) == '>')) {
10225 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010226
10227 if (ctxt->sax2) {
10228 if ((ctxt->sax != NULL) &&
10229 (ctxt->sax->endElementNs != NULL) &&
10230 (!ctxt->disableSAX))
10231 ctxt->sax->endElementNs(ctxt->userData, name,
10232 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010233 if (ctxt->nsNr - nsNr > 0)
10234 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010235#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010236 } else {
10237 if ((ctxt->sax != NULL) &&
10238 (ctxt->sax->endElement != NULL) &&
10239 (!ctxt->disableSAX))
10240 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010241#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010242 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010243 spacePop(ctxt);
10244 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010245 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010246 } else {
10247 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010248 }
10249 break;
10250 }
10251 if (RAW == '>') {
10252 NEXT;
10253 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010254 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010255 "Couldn't find end of Start Tag %s\n",
10256 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010257 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010258 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010259 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010260 if (ctxt->sax2)
10261 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010262#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010263 else
10264 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010265#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010266
Daniel Veillarda880b122003-04-21 21:36:41 +000010267 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010268 break;
10269 }
10270 case XML_PARSER_CONTENT: {
10271 const xmlChar *test;
10272 unsigned int cons;
10273 if ((avail < 2) && (ctxt->inputNr == 1))
10274 goto done;
10275 cur = ctxt->input->cur[0];
10276 next = ctxt->input->cur[1];
10277
10278 test = CUR_PTR;
10279 cons = ctxt->input->consumed;
10280 if ((cur == '<') && (next == '/')) {
10281 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010282 break;
10283 } else if ((cur == '<') && (next == '?')) {
10284 if ((!terminate) &&
10285 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10286 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010287 xmlParsePI(ctxt);
10288 } else if ((cur == '<') && (next != '!')) {
10289 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010290 break;
10291 } else if ((cur == '<') && (next == '!') &&
10292 (ctxt->input->cur[2] == '-') &&
10293 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010294 int term;
10295
10296 if (avail < 4)
10297 goto done;
10298 ctxt->input->cur += 4;
10299 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10300 ctxt->input->cur -= 4;
10301 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010302 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010303 xmlParseComment(ctxt);
10304 ctxt->instate = XML_PARSER_CONTENT;
10305 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10306 (ctxt->input->cur[2] == '[') &&
10307 (ctxt->input->cur[3] == 'C') &&
10308 (ctxt->input->cur[4] == 'D') &&
10309 (ctxt->input->cur[5] == 'A') &&
10310 (ctxt->input->cur[6] == 'T') &&
10311 (ctxt->input->cur[7] == 'A') &&
10312 (ctxt->input->cur[8] == '[')) {
10313 SKIP(9);
10314 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010315 break;
10316 } else if ((cur == '<') && (next == '!') &&
10317 (avail < 9)) {
10318 goto done;
10319 } else if (cur == '&') {
10320 if ((!terminate) &&
10321 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10322 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010323 xmlParseReference(ctxt);
10324 } else {
10325 /* TODO Avoid the extra copy, handle directly !!! */
10326 /*
10327 * Goal of the following test is:
10328 * - minimize calls to the SAX 'character' callback
10329 * when they are mergeable
10330 * - handle an problem for isBlank when we only parse
10331 * a sequence of blank chars and the next one is
10332 * not available to check against '<' presence.
10333 * - tries to homogenize the differences in SAX
10334 * callbacks between the push and pull versions
10335 * of the parser.
10336 */
10337 if ((ctxt->inputNr == 1) &&
10338 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10339 if (!terminate) {
10340 if (ctxt->progressive) {
10341 if ((lastlt == NULL) ||
10342 (ctxt->input->cur > lastlt))
10343 goto done;
10344 } else if (xmlParseLookupSequence(ctxt,
10345 '<', 0, 0) < 0) {
10346 goto done;
10347 }
10348 }
10349 }
10350 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010351 xmlParseCharData(ctxt, 0);
10352 }
10353 /*
10354 * Pop-up of finished entities.
10355 */
10356 while ((RAW == 0) && (ctxt->inputNr > 1))
10357 xmlPopInput(ctxt);
10358 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010359 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10360 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010361 ctxt->instate = XML_PARSER_EOF;
10362 break;
10363 }
10364 break;
10365 }
10366 case XML_PARSER_END_TAG:
10367 if (avail < 2)
10368 goto done;
10369 if (!terminate) {
10370 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010371 /* > can be found unescaped in attribute values */
10372 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010373 goto done;
10374 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10375 goto done;
10376 }
10377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010378 if (ctxt->sax2) {
10379 xmlParseEndTag2(ctxt,
10380 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10381 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010382 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010383 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010384 }
10385#ifdef LIBXML_SAX1_ENABLED
10386 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010387 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010388#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010389 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010390 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010391 } else {
10392 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010393 }
10394 break;
10395 case XML_PARSER_CDATA_SECTION: {
10396 /*
10397 * The Push mode need to have the SAX callback for
10398 * cdataBlock merge back contiguous callbacks.
10399 */
10400 int base;
10401
10402 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10403 if (base < 0) {
10404 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010405 int tmp;
10406
10407 tmp = xmlCheckCdataPush(ctxt->input->cur,
10408 XML_PARSER_BIG_BUFFER_SIZE);
10409 if (tmp < 0) {
10410 tmp = -tmp;
10411 ctxt->input->cur += tmp;
10412 goto encoding_error;
10413 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010414 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10415 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010416 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010417 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010418 else if (ctxt->sax->characters != NULL)
10419 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010420 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010421 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010422 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010423 ctxt->checkIndex = 0;
10424 }
10425 goto done;
10426 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010427 int tmp;
10428
10429 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10430 if ((tmp < 0) || (tmp != base)) {
10431 tmp = -tmp;
10432 ctxt->input->cur += tmp;
10433 goto encoding_error;
10434 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010435 if ((ctxt->sax != NULL) && (base == 0) &&
10436 (ctxt->sax->cdataBlock != NULL) &&
10437 (!ctxt->disableSAX)) {
10438 /*
10439 * Special case to provide identical behaviour
10440 * between pull and push parsers on enpty CDATA
10441 * sections
10442 */
10443 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10444 (!strncmp((const char *)&ctxt->input->cur[-9],
10445 "<![CDATA[", 9)))
10446 ctxt->sax->cdataBlock(ctxt->userData,
10447 BAD_CAST "", 0);
10448 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010449 (!ctxt->disableSAX)) {
10450 if (ctxt->sax->cdataBlock != NULL)
10451 ctxt->sax->cdataBlock(ctxt->userData,
10452 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010453 else if (ctxt->sax->characters != NULL)
10454 ctxt->sax->characters(ctxt->userData,
10455 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010456 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010457 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010458 ctxt->checkIndex = 0;
10459 ctxt->instate = XML_PARSER_CONTENT;
10460#ifdef DEBUG_PUSH
10461 xmlGenericError(xmlGenericErrorContext,
10462 "PP: entering CONTENT\n");
10463#endif
10464 }
10465 break;
10466 }
Owen Taylor3473f882001-02-23 17:55:21 +000010467 case XML_PARSER_MISC:
10468 SKIP_BLANKS;
10469 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010470 avail = ctxt->input->length -
10471 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010472 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010473 avail = ctxt->input->buf->buffer->use -
10474 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 if (avail < 2)
10476 goto done;
10477 cur = ctxt->input->cur[0];
10478 next = ctxt->input->cur[1];
10479 if ((cur == '<') && (next == '?')) {
10480 if ((!terminate) &&
10481 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10482 goto done;
10483#ifdef DEBUG_PUSH
10484 xmlGenericError(xmlGenericErrorContext,
10485 "PP: Parsing PI\n");
10486#endif
10487 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010488 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010489 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010490 (ctxt->input->cur[2] == '-') &&
10491 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010492 if ((!terminate) &&
10493 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10494 goto done;
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext,
10497 "PP: Parsing Comment\n");
10498#endif
10499 xmlParseComment(ctxt);
10500 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010501 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010502 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010503 (ctxt->input->cur[2] == 'D') &&
10504 (ctxt->input->cur[3] == 'O') &&
10505 (ctxt->input->cur[4] == 'C') &&
10506 (ctxt->input->cur[5] == 'T') &&
10507 (ctxt->input->cur[6] == 'Y') &&
10508 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010509 (ctxt->input->cur[8] == 'E')) {
10510 if ((!terminate) &&
10511 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10512 goto done;
10513#ifdef DEBUG_PUSH
10514 xmlGenericError(xmlGenericErrorContext,
10515 "PP: Parsing internal subset\n");
10516#endif
10517 ctxt->inSubset = 1;
10518 xmlParseDocTypeDecl(ctxt);
10519 if (RAW == '[') {
10520 ctxt->instate = XML_PARSER_DTD;
10521#ifdef DEBUG_PUSH
10522 xmlGenericError(xmlGenericErrorContext,
10523 "PP: entering DTD\n");
10524#endif
10525 } else {
10526 /*
10527 * Create and update the external subset.
10528 */
10529 ctxt->inSubset = 2;
10530 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10531 (ctxt->sax->externalSubset != NULL))
10532 ctxt->sax->externalSubset(ctxt->userData,
10533 ctxt->intSubName, ctxt->extSubSystem,
10534 ctxt->extSubURI);
10535 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010536 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010537 ctxt->instate = XML_PARSER_PROLOG;
10538#ifdef DEBUG_PUSH
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: entering PROLOG\n");
10541#endif
10542 }
10543 } else if ((cur == '<') && (next == '!') &&
10544 (avail < 9)) {
10545 goto done;
10546 } else {
10547 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010548 ctxt->progressive = 1;
10549 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010550#ifdef DEBUG_PUSH
10551 xmlGenericError(xmlGenericErrorContext,
10552 "PP: entering START_TAG\n");
10553#endif
10554 }
10555 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010556 case XML_PARSER_PROLOG:
10557 SKIP_BLANKS;
10558 if (ctxt->input->buf == NULL)
10559 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10560 else
10561 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10562 if (avail < 2)
10563 goto done;
10564 cur = ctxt->input->cur[0];
10565 next = ctxt->input->cur[1];
10566 if ((cur == '<') && (next == '?')) {
10567 if ((!terminate) &&
10568 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10569 goto done;
10570#ifdef DEBUG_PUSH
10571 xmlGenericError(xmlGenericErrorContext,
10572 "PP: Parsing PI\n");
10573#endif
10574 xmlParsePI(ctxt);
10575 } else if ((cur == '<') && (next == '!') &&
10576 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10577 if ((!terminate) &&
10578 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10579 goto done;
10580#ifdef DEBUG_PUSH
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: Parsing Comment\n");
10583#endif
10584 xmlParseComment(ctxt);
10585 ctxt->instate = XML_PARSER_PROLOG;
10586 } else if ((cur == '<') && (next == '!') &&
10587 (avail < 4)) {
10588 goto done;
10589 } else {
10590 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010591 if (ctxt->progressive == 0)
10592 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010593 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010594#ifdef DEBUG_PUSH
10595 xmlGenericError(xmlGenericErrorContext,
10596 "PP: entering START_TAG\n");
10597#endif
10598 }
10599 break;
10600 case XML_PARSER_EPILOG:
10601 SKIP_BLANKS;
10602 if (ctxt->input->buf == NULL)
10603 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10604 else
10605 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10606 if (avail < 2)
10607 goto done;
10608 cur = ctxt->input->cur[0];
10609 next = ctxt->input->cur[1];
10610 if ((cur == '<') && (next == '?')) {
10611 if ((!terminate) &&
10612 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10613 goto done;
10614#ifdef DEBUG_PUSH
10615 xmlGenericError(xmlGenericErrorContext,
10616 "PP: Parsing PI\n");
10617#endif
10618 xmlParsePI(ctxt);
10619 ctxt->instate = XML_PARSER_EPILOG;
10620 } else if ((cur == '<') && (next == '!') &&
10621 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10622 if ((!terminate) &&
10623 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10624 goto done;
10625#ifdef DEBUG_PUSH
10626 xmlGenericError(xmlGenericErrorContext,
10627 "PP: Parsing Comment\n");
10628#endif
10629 xmlParseComment(ctxt);
10630 ctxt->instate = XML_PARSER_EPILOG;
10631 } else if ((cur == '<') && (next == '!') &&
10632 (avail < 4)) {
10633 goto done;
10634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010635 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010636 ctxt->instate = XML_PARSER_EOF;
10637#ifdef DEBUG_PUSH
10638 xmlGenericError(xmlGenericErrorContext,
10639 "PP: entering EOF\n");
10640#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010641 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010642 ctxt->sax->endDocument(ctxt->userData);
10643 goto done;
10644 }
10645 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010646 case XML_PARSER_DTD: {
10647 /*
10648 * Sorry but progressive parsing of the internal subset
10649 * is not expected to be supported. We first check that
10650 * the full content of the internal subset is available and
10651 * the parsing is launched only at that point.
10652 * Internal subset ends up with "']' S? '>'" in an unescaped
10653 * section and not in a ']]>' sequence which are conditional
10654 * sections (whoever argued to keep that crap in XML deserve
10655 * a place in hell !).
10656 */
10657 int base, i;
10658 xmlChar *buf;
10659 xmlChar quote = 0;
10660
10661 base = ctxt->input->cur - ctxt->input->base;
10662 if (base < 0) return(0);
10663 if (ctxt->checkIndex > base)
10664 base = ctxt->checkIndex;
10665 buf = ctxt->input->buf->buffer->content;
10666 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10667 base++) {
10668 if (quote != 0) {
10669 if (buf[base] == quote)
10670 quote = 0;
10671 continue;
10672 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010673 if ((quote == 0) && (buf[base] == '<')) {
10674 int found = 0;
10675 /* special handling of comments */
10676 if (((unsigned int) base + 4 <
10677 ctxt->input->buf->buffer->use) &&
10678 (buf[base + 1] == '!') &&
10679 (buf[base + 2] == '-') &&
10680 (buf[base + 3] == '-')) {
10681 for (;(unsigned int) base + 3 <
10682 ctxt->input->buf->buffer->use; base++) {
10683 if ((buf[base] == '-') &&
10684 (buf[base + 1] == '-') &&
10685 (buf[base + 2] == '>')) {
10686 found = 1;
10687 base += 2;
10688 break;
10689 }
10690 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010691 if (!found) {
10692#if 0
10693 fprintf(stderr, "unfinished comment\n");
10694#endif
10695 break; /* for */
10696 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010697 continue;
10698 }
10699 }
Owen Taylor3473f882001-02-23 17:55:21 +000010700 if (buf[base] == '"') {
10701 quote = '"';
10702 continue;
10703 }
10704 if (buf[base] == '\'') {
10705 quote = '\'';
10706 continue;
10707 }
10708 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010709#if 0
10710 fprintf(stderr, "%c%c%c%c: ", buf[base],
10711 buf[base + 1], buf[base + 2], buf[base + 3]);
10712#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010713 if ((unsigned int) base +1 >=
10714 ctxt->input->buf->buffer->use)
10715 break;
10716 if (buf[base + 1] == ']') {
10717 /* conditional crap, skip both ']' ! */
10718 base++;
10719 continue;
10720 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010721 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010722 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10723 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010724 if (buf[base + i] == '>') {
10725#if 0
10726 fprintf(stderr, "found\n");
10727#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010728 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010729 }
10730 if (!IS_BLANK_CH(buf[base + i])) {
10731#if 0
10732 fprintf(stderr, "not found\n");
10733#endif
10734 goto not_end_of_int_subset;
10735 }
Owen Taylor3473f882001-02-23 17:55:21 +000010736 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010737#if 0
10738 fprintf(stderr, "end of stream\n");
10739#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010740 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010741
Owen Taylor3473f882001-02-23 17:55:21 +000010742 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010743not_end_of_int_subset:
10744 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010745 }
10746 /*
10747 * We didn't found the end of the Internal subset
10748 */
Owen Taylor3473f882001-02-23 17:55:21 +000010749#ifdef DEBUG_PUSH
10750 if (next == 0)
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: lookup of int subset end filed\n");
10753#endif
10754 goto done;
10755
10756found_end_int_subset:
10757 xmlParseInternalSubset(ctxt);
10758 ctxt->inSubset = 2;
10759 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10760 (ctxt->sax->externalSubset != NULL))
10761 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10762 ctxt->extSubSystem, ctxt->extSubURI);
10763 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010764 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010765 ctxt->instate = XML_PARSER_PROLOG;
10766 ctxt->checkIndex = 0;
10767#ifdef DEBUG_PUSH
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: entering PROLOG\n");
10770#endif
10771 break;
10772 }
10773 case XML_PARSER_COMMENT:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: internal error, state == COMMENT\n");
10776 ctxt->instate = XML_PARSER_CONTENT;
10777#ifdef DEBUG_PUSH
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: entering CONTENT\n");
10780#endif
10781 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010782 case XML_PARSER_IGNORE:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: internal error, state == IGNORE");
10785 ctxt->instate = XML_PARSER_DTD;
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext,
10788 "PP: entering DTD\n");
10789#endif
10790 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010791 case XML_PARSER_PI:
10792 xmlGenericError(xmlGenericErrorContext,
10793 "PP: internal error, state == PI\n");
10794 ctxt->instate = XML_PARSER_CONTENT;
10795#ifdef DEBUG_PUSH
10796 xmlGenericError(xmlGenericErrorContext,
10797 "PP: entering CONTENT\n");
10798#endif
10799 break;
10800 case XML_PARSER_ENTITY_DECL:
10801 xmlGenericError(xmlGenericErrorContext,
10802 "PP: internal error, state == ENTITY_DECL\n");
10803 ctxt->instate = XML_PARSER_DTD;
10804#ifdef DEBUG_PUSH
10805 xmlGenericError(xmlGenericErrorContext,
10806 "PP: entering DTD\n");
10807#endif
10808 break;
10809 case XML_PARSER_ENTITY_VALUE:
10810 xmlGenericError(xmlGenericErrorContext,
10811 "PP: internal error, state == ENTITY_VALUE\n");
10812 ctxt->instate = XML_PARSER_CONTENT;
10813#ifdef DEBUG_PUSH
10814 xmlGenericError(xmlGenericErrorContext,
10815 "PP: entering DTD\n");
10816#endif
10817 break;
10818 case XML_PARSER_ATTRIBUTE_VALUE:
10819 xmlGenericError(xmlGenericErrorContext,
10820 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10821 ctxt->instate = XML_PARSER_START_TAG;
10822#ifdef DEBUG_PUSH
10823 xmlGenericError(xmlGenericErrorContext,
10824 "PP: entering START_TAG\n");
10825#endif
10826 break;
10827 case XML_PARSER_SYSTEM_LITERAL:
10828 xmlGenericError(xmlGenericErrorContext,
10829 "PP: internal error, state == SYSTEM_LITERAL\n");
10830 ctxt->instate = XML_PARSER_START_TAG;
10831#ifdef DEBUG_PUSH
10832 xmlGenericError(xmlGenericErrorContext,
10833 "PP: entering START_TAG\n");
10834#endif
10835 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010836 case XML_PARSER_PUBLIC_LITERAL:
10837 xmlGenericError(xmlGenericErrorContext,
10838 "PP: internal error, state == PUBLIC_LITERAL\n");
10839 ctxt->instate = XML_PARSER_START_TAG;
10840#ifdef DEBUG_PUSH
10841 xmlGenericError(xmlGenericErrorContext,
10842 "PP: entering START_TAG\n");
10843#endif
10844 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010845 }
10846 }
10847done:
10848#ifdef DEBUG_PUSH
10849 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10850#endif
10851 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010852encoding_error:
10853 {
10854 char buffer[150];
10855
10856 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10857 ctxt->input->cur[0], ctxt->input->cur[1],
10858 ctxt->input->cur[2], ctxt->input->cur[3]);
10859 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10860 "Input is not proper UTF-8, indicate encoding !\n%s",
10861 BAD_CAST buffer, NULL);
10862 }
10863 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010864}
10865
10866/**
Owen Taylor3473f882001-02-23 17:55:21 +000010867 * xmlParseChunk:
10868 * @ctxt: an XML parser context
10869 * @chunk: an char array
10870 * @size: the size in byte of the chunk
10871 * @terminate: last chunk indicator
10872 *
10873 * Parse a Chunk of memory
10874 *
10875 * Returns zero if no error, the xmlParserErrors otherwise.
10876 */
10877int
10878xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10879 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010880 int end_in_lf = 0;
10881
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010882 if (ctxt == NULL)
10883 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010884 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010885 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010886 if (ctxt->instate == XML_PARSER_START)
10887 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010888 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10889 (chunk[size - 1] == '\r')) {
10890 end_in_lf = 1;
10891 size--;
10892 }
Owen Taylor3473f882001-02-23 17:55:21 +000010893 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10894 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10895 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10896 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010897 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010898
William M. Bracka3215c72004-07-31 16:24:01 +000010899 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10900 if (res < 0) {
10901 ctxt->errNo = XML_PARSER_EOF;
10902 ctxt->disableSAX = 1;
10903 return (XML_PARSER_EOF);
10904 }
Owen Taylor3473f882001-02-23 17:55:21 +000010905 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10906 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010907 ctxt->input->end =
10908 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010909#ifdef DEBUG_PUSH
10910 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10911#endif
10912
Owen Taylor3473f882001-02-23 17:55:21 +000010913 } else if (ctxt->instate != XML_PARSER_EOF) {
10914 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10915 xmlParserInputBufferPtr in = ctxt->input->buf;
10916 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10917 (in->raw != NULL)) {
10918 int nbchars;
10919
10920 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10921 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010922 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010923 xmlGenericError(xmlGenericErrorContext,
10924 "xmlParseChunk: encoder error\n");
10925 return(XML_ERR_INVALID_ENCODING);
10926 }
10927 }
10928 }
10929 }
10930 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010931 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10932 (ctxt->input->buf != NULL)) {
10933 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10934 }
Daniel Veillard14412512005-01-21 23:53:26 +000010935 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010936 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010937 if (terminate) {
10938 /*
10939 * Check for termination
10940 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010941 int avail = 0;
10942
10943 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010944 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010945 avail = ctxt->input->length -
10946 (ctxt->input->cur - ctxt->input->base);
10947 else
10948 avail = ctxt->input->buf->buffer->use -
10949 (ctxt->input->cur - ctxt->input->base);
10950 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010951
Owen Taylor3473f882001-02-23 17:55:21 +000010952 if ((ctxt->instate != XML_PARSER_EOF) &&
10953 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010954 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010955 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010956 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010957 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010958 }
Owen Taylor3473f882001-02-23 17:55:21 +000010959 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010960 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010961 ctxt->sax->endDocument(ctxt->userData);
10962 }
10963 ctxt->instate = XML_PARSER_EOF;
10964 }
10965 return((xmlParserErrors) ctxt->errNo);
10966}
10967
10968/************************************************************************
10969 * *
10970 * I/O front end functions to the parser *
10971 * *
10972 ************************************************************************/
10973
10974/**
Owen Taylor3473f882001-02-23 17:55:21 +000010975 * xmlCreatePushParserCtxt:
10976 * @sax: a SAX handler
10977 * @user_data: The user data returned on SAX callbacks
10978 * @chunk: a pointer to an array of chars
10979 * @size: number of chars in the array
10980 * @filename: an optional file name or URI
10981 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010982 * Create a parser context for using the XML parser in push mode.
10983 * If @buffer and @size are non-NULL, the data is used to detect
10984 * the encoding. The remaining characters will be parsed so they
10985 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010986 * To allow content encoding detection, @size should be >= 4
10987 * The value of @filename is used for fetching external entities
10988 * and error/warning reports.
10989 *
10990 * Returns the new parser context or NULL
10991 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010992
Owen Taylor3473f882001-02-23 17:55:21 +000010993xmlParserCtxtPtr
10994xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10995 const char *chunk, int size, const char *filename) {
10996 xmlParserCtxtPtr ctxt;
10997 xmlParserInputPtr inputStream;
10998 xmlParserInputBufferPtr buf;
10999 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11000
11001 /*
11002 * plug some encoding conversion routines
11003 */
11004 if ((chunk != NULL) && (size >= 4))
11005 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11006
11007 buf = xmlAllocParserInputBuffer(enc);
11008 if (buf == NULL) return(NULL);
11009
11010 ctxt = xmlNewParserCtxt();
11011 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011012 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011013 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011014 return(NULL);
11015 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011016 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011017 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11018 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011019 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011020 xmlFreeParserInputBuffer(buf);
11021 xmlFreeParserCtxt(ctxt);
11022 return(NULL);
11023 }
Owen Taylor3473f882001-02-23 17:55:21 +000011024 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011025#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011026 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011027#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011028 xmlFree(ctxt->sax);
11029 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11030 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011031 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011032 xmlFreeParserInputBuffer(buf);
11033 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011034 return(NULL);
11035 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011036 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11037 if (sax->initialized == XML_SAX2_MAGIC)
11038 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11039 else
11040 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011041 if (user_data != NULL)
11042 ctxt->userData = user_data;
11043 }
11044 if (filename == NULL) {
11045 ctxt->directory = NULL;
11046 } else {
11047 ctxt->directory = xmlParserGetDirectory(filename);
11048 }
11049
11050 inputStream = xmlNewInputStream(ctxt);
11051 if (inputStream == NULL) {
11052 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011053 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011054 return(NULL);
11055 }
11056
11057 if (filename == NULL)
11058 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011059 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011060 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011061 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011062 if (inputStream->filename == NULL) {
11063 xmlFreeParserCtxt(ctxt);
11064 xmlFreeParserInputBuffer(buf);
11065 return(NULL);
11066 }
11067 }
Owen Taylor3473f882001-02-23 17:55:21 +000011068 inputStream->buf = buf;
11069 inputStream->base = inputStream->buf->buffer->content;
11070 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011071 inputStream->end =
11072 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011073
11074 inputPush(ctxt, inputStream);
11075
William M. Brack3a1cd212005-02-11 14:35:54 +000011076 /*
11077 * If the caller didn't provide an initial 'chunk' for determining
11078 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11079 * that it can be automatically determined later
11080 */
11081 if ((size == 0) || (chunk == NULL)) {
11082 ctxt->charset = XML_CHAR_ENCODING_NONE;
11083 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011084 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11085 int cur = ctxt->input->cur - ctxt->input->base;
11086
Owen Taylor3473f882001-02-23 17:55:21 +000011087 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011088
11089 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11090 ctxt->input->cur = ctxt->input->base + cur;
11091 ctxt->input->end =
11092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011093#ifdef DEBUG_PUSH
11094 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11095#endif
11096 }
11097
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011098 if (enc != XML_CHAR_ENCODING_NONE) {
11099 xmlSwitchEncoding(ctxt, enc);
11100 }
11101
Owen Taylor3473f882001-02-23 17:55:21 +000011102 return(ctxt);
11103}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011104#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011105
11106/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011107 * xmlStopParser:
11108 * @ctxt: an XML parser context
11109 *
11110 * Blocks further parser processing
11111 */
11112void
11113xmlStopParser(xmlParserCtxtPtr ctxt) {
11114 if (ctxt == NULL)
11115 return;
11116 ctxt->instate = XML_PARSER_EOF;
11117 ctxt->disableSAX = 1;
11118 if (ctxt->input != NULL) {
11119 ctxt->input->cur = BAD_CAST"";
11120 ctxt->input->base = ctxt->input->cur;
11121 }
11122}
11123
11124/**
Owen Taylor3473f882001-02-23 17:55:21 +000011125 * xmlCreateIOParserCtxt:
11126 * @sax: a SAX handler
11127 * @user_data: The user data returned on SAX callbacks
11128 * @ioread: an I/O read function
11129 * @ioclose: an I/O close function
11130 * @ioctx: an I/O handler
11131 * @enc: the charset encoding if known
11132 *
11133 * Create a parser context for using the XML parser with an existing
11134 * I/O stream
11135 *
11136 * Returns the new parser context or NULL
11137 */
11138xmlParserCtxtPtr
11139xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11140 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11141 void *ioctx, xmlCharEncoding enc) {
11142 xmlParserCtxtPtr ctxt;
11143 xmlParserInputPtr inputStream;
11144 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011145
11146 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011147
11148 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11149 if (buf == NULL) return(NULL);
11150
11151 ctxt = xmlNewParserCtxt();
11152 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011153 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011154 return(NULL);
11155 }
11156 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011157#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011158 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011159#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011160 xmlFree(ctxt->sax);
11161 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11162 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011163 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011164 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011165 return(NULL);
11166 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011167 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11168 if (sax->initialized == XML_SAX2_MAGIC)
11169 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11170 else
11171 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011172 if (user_data != NULL)
11173 ctxt->userData = user_data;
11174 }
11175
11176 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11177 if (inputStream == NULL) {
11178 xmlFreeParserCtxt(ctxt);
11179 return(NULL);
11180 }
11181 inputPush(ctxt, inputStream);
11182
11183 return(ctxt);
11184}
11185
Daniel Veillard4432df22003-09-28 18:58:27 +000011186#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011187/************************************************************************
11188 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011189 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011190 * *
11191 ************************************************************************/
11192
11193/**
11194 * xmlIOParseDTD:
11195 * @sax: the SAX handler block or NULL
11196 * @input: an Input Buffer
11197 * @enc: the charset encoding if known
11198 *
11199 * Load and parse a DTD
11200 *
11201 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011202 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011203 */
11204
11205xmlDtdPtr
11206xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11207 xmlCharEncoding enc) {
11208 xmlDtdPtr ret = NULL;
11209 xmlParserCtxtPtr ctxt;
11210 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011211 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011212
11213 if (input == NULL)
11214 return(NULL);
11215
11216 ctxt = xmlNewParserCtxt();
11217 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011218 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011219 return(NULL);
11220 }
11221
11222 /*
11223 * Set-up the SAX context
11224 */
11225 if (sax != NULL) {
11226 if (ctxt->sax != NULL)
11227 xmlFree(ctxt->sax);
11228 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011229 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011230 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011231 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011232
11233 /*
11234 * generate a parser input from the I/O handler
11235 */
11236
Daniel Veillard43caefb2003-12-07 19:32:22 +000011237 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011238 if (pinput == NULL) {
11239 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011240 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011241 xmlFreeParserCtxt(ctxt);
11242 return(NULL);
11243 }
11244
11245 /*
11246 * plug some encoding conversion routines here.
11247 */
11248 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011249 if (enc != XML_CHAR_ENCODING_NONE) {
11250 xmlSwitchEncoding(ctxt, enc);
11251 }
Owen Taylor3473f882001-02-23 17:55:21 +000011252
11253 pinput->filename = NULL;
11254 pinput->line = 1;
11255 pinput->col = 1;
11256 pinput->base = ctxt->input->cur;
11257 pinput->cur = ctxt->input->cur;
11258 pinput->free = NULL;
11259
11260 /*
11261 * let's parse that entity knowing it's an external subset.
11262 */
11263 ctxt->inSubset = 2;
11264 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11265 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11266 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011267
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011268 if ((enc == XML_CHAR_ENCODING_NONE) &&
11269 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011270 /*
11271 * Get the 4 first bytes and decode the charset
11272 * if enc != XML_CHAR_ENCODING_NONE
11273 * plug some encoding conversion routines.
11274 */
11275 start[0] = RAW;
11276 start[1] = NXT(1);
11277 start[2] = NXT(2);
11278 start[3] = NXT(3);
11279 enc = xmlDetectCharEncoding(start, 4);
11280 if (enc != XML_CHAR_ENCODING_NONE) {
11281 xmlSwitchEncoding(ctxt, enc);
11282 }
11283 }
11284
Owen Taylor3473f882001-02-23 17:55:21 +000011285 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11286
11287 if (ctxt->myDoc != NULL) {
11288 if (ctxt->wellFormed) {
11289 ret = ctxt->myDoc->extSubset;
11290 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011291 if (ret != NULL) {
11292 xmlNodePtr tmp;
11293
11294 ret->doc = NULL;
11295 tmp = ret->children;
11296 while (tmp != NULL) {
11297 tmp->doc = NULL;
11298 tmp = tmp->next;
11299 }
11300 }
Owen Taylor3473f882001-02-23 17:55:21 +000011301 } else {
11302 ret = NULL;
11303 }
11304 xmlFreeDoc(ctxt->myDoc);
11305 ctxt->myDoc = NULL;
11306 }
11307 if (sax != NULL) ctxt->sax = NULL;
11308 xmlFreeParserCtxt(ctxt);
11309
11310 return(ret);
11311}
11312
11313/**
11314 * xmlSAXParseDTD:
11315 * @sax: the SAX handler block
11316 * @ExternalID: a NAME* containing the External ID of the DTD
11317 * @SystemID: a NAME* containing the URL to the DTD
11318 *
11319 * Load and parse an external subset.
11320 *
11321 * Returns the resulting xmlDtdPtr or NULL in case of error.
11322 */
11323
11324xmlDtdPtr
11325xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11326 const xmlChar *SystemID) {
11327 xmlDtdPtr ret = NULL;
11328 xmlParserCtxtPtr ctxt;
11329 xmlParserInputPtr input = NULL;
11330 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011331 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011332
11333 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11334
11335 ctxt = xmlNewParserCtxt();
11336 if (ctxt == NULL) {
11337 return(NULL);
11338 }
11339
11340 /*
11341 * Set-up the SAX context
11342 */
11343 if (sax != NULL) {
11344 if (ctxt->sax != NULL)
11345 xmlFree(ctxt->sax);
11346 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011347 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011348 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011349
11350 /*
11351 * Canonicalise the system ID
11352 */
11353 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011354 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011355 xmlFreeParserCtxt(ctxt);
11356 return(NULL);
11357 }
Owen Taylor3473f882001-02-23 17:55:21 +000011358
11359 /*
11360 * Ask the Entity resolver to load the damn thing
11361 */
11362
11363 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011364 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11365 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011366 if (input == NULL) {
11367 if (sax != NULL) ctxt->sax = NULL;
11368 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011369 if (systemIdCanonic != NULL)
11370 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011371 return(NULL);
11372 }
11373
11374 /*
11375 * plug some encoding conversion routines here.
11376 */
11377 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011378 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11379 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11380 xmlSwitchEncoding(ctxt, enc);
11381 }
Owen Taylor3473f882001-02-23 17:55:21 +000011382
11383 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011384 input->filename = (char *) systemIdCanonic;
11385 else
11386 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011387 input->line = 1;
11388 input->col = 1;
11389 input->base = ctxt->input->cur;
11390 input->cur = ctxt->input->cur;
11391 input->free = NULL;
11392
11393 /*
11394 * let's parse that entity knowing it's an external subset.
11395 */
11396 ctxt->inSubset = 2;
11397 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11398 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11399 ExternalID, SystemID);
11400 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11401
11402 if (ctxt->myDoc != NULL) {
11403 if (ctxt->wellFormed) {
11404 ret = ctxt->myDoc->extSubset;
11405 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011406 if (ret != NULL) {
11407 xmlNodePtr tmp;
11408
11409 ret->doc = NULL;
11410 tmp = ret->children;
11411 while (tmp != NULL) {
11412 tmp->doc = NULL;
11413 tmp = tmp->next;
11414 }
11415 }
Owen Taylor3473f882001-02-23 17:55:21 +000011416 } else {
11417 ret = NULL;
11418 }
11419 xmlFreeDoc(ctxt->myDoc);
11420 ctxt->myDoc = NULL;
11421 }
11422 if (sax != NULL) ctxt->sax = NULL;
11423 xmlFreeParserCtxt(ctxt);
11424
11425 return(ret);
11426}
11427
Daniel Veillard4432df22003-09-28 18:58:27 +000011428
Owen Taylor3473f882001-02-23 17:55:21 +000011429/**
11430 * xmlParseDTD:
11431 * @ExternalID: a NAME* containing the External ID of the DTD
11432 * @SystemID: a NAME* containing the URL to the DTD
11433 *
11434 * Load and parse an external subset.
11435 *
11436 * Returns the resulting xmlDtdPtr or NULL in case of error.
11437 */
11438
11439xmlDtdPtr
11440xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11441 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11442}
Daniel Veillard4432df22003-09-28 18:58:27 +000011443#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011444
11445/************************************************************************
11446 * *
11447 * Front ends when parsing an Entity *
11448 * *
11449 ************************************************************************/
11450
11451/**
Owen Taylor3473f882001-02-23 17:55:21 +000011452 * xmlParseCtxtExternalEntity:
11453 * @ctx: the existing parsing context
11454 * @URL: the URL for the entity to load
11455 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011456 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011457 *
11458 * Parse an external general entity within an existing parsing context
11459 * An external general parsed entity is well-formed if it matches the
11460 * production labeled extParsedEnt.
11461 *
11462 * [78] extParsedEnt ::= TextDecl? content
11463 *
11464 * Returns 0 if the entity is well formed, -1 in case of args problem and
11465 * the parser error code otherwise
11466 */
11467
11468int
11469xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011470 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011471 xmlParserCtxtPtr ctxt;
11472 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011473 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011474 xmlSAXHandlerPtr oldsax = NULL;
11475 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011476 xmlChar start[4];
11477 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011478 xmlParserInputPtr inputStream;
11479 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011480
Daniel Veillardce682bc2004-11-05 17:22:25 +000011481 if (ctx == NULL) return(-1);
11482
Owen Taylor3473f882001-02-23 17:55:21 +000011483 if (ctx->depth > 40) {
11484 return(XML_ERR_ENTITY_LOOP);
11485 }
11486
Daniel Veillardcda96922001-08-21 10:56:31 +000011487 if (lst != NULL)
11488 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011489 if ((URL == NULL) && (ID == NULL))
11490 return(-1);
11491 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11492 return(-1);
11493
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011494 ctxt = xmlNewParserCtxt();
11495 if (ctxt == NULL) {
11496 return(-1);
11497 }
11498
Owen Taylor3473f882001-02-23 17:55:21 +000011499 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011500 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011501
11502 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11503 if (inputStream == NULL) {
11504 xmlFreeParserCtxt(ctxt);
11505 return(-1);
11506 }
11507
11508 inputPush(ctxt, inputStream);
11509
11510 if ((ctxt->directory == NULL) && (directory == NULL))
11511 directory = xmlParserGetDirectory((char *)URL);
11512 if ((ctxt->directory == NULL) && (directory != NULL))
11513 ctxt->directory = directory;
11514
Owen Taylor3473f882001-02-23 17:55:21 +000011515 oldsax = ctxt->sax;
11516 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011517 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011518 newDoc = xmlNewDoc(BAD_CAST "1.0");
11519 if (newDoc == NULL) {
11520 xmlFreeParserCtxt(ctxt);
11521 return(-1);
11522 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011523 if (ctx->myDoc->dict) {
11524 newDoc->dict = ctx->myDoc->dict;
11525 xmlDictReference(newDoc->dict);
11526 }
Owen Taylor3473f882001-02-23 17:55:21 +000011527 if (ctx->myDoc != NULL) {
11528 newDoc->intSubset = ctx->myDoc->intSubset;
11529 newDoc->extSubset = ctx->myDoc->extSubset;
11530 }
11531 if (ctx->myDoc->URL != NULL) {
11532 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11533 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011534 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11535 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011536 ctxt->sax = oldsax;
11537 xmlFreeParserCtxt(ctxt);
11538 newDoc->intSubset = NULL;
11539 newDoc->extSubset = NULL;
11540 xmlFreeDoc(newDoc);
11541 return(-1);
11542 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011543 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011544 nodePush(ctxt, newDoc->children);
11545 if (ctx->myDoc == NULL) {
11546 ctxt->myDoc = newDoc;
11547 } else {
11548 ctxt->myDoc = ctx->myDoc;
11549 newDoc->children->doc = ctx->myDoc;
11550 }
11551
Daniel Veillard87a764e2001-06-20 17:41:10 +000011552 /*
11553 * Get the 4 first bytes and decode the charset
11554 * if enc != XML_CHAR_ENCODING_NONE
11555 * plug some encoding conversion routines.
11556 */
11557 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011558 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11559 start[0] = RAW;
11560 start[1] = NXT(1);
11561 start[2] = NXT(2);
11562 start[3] = NXT(3);
11563 enc = xmlDetectCharEncoding(start, 4);
11564 if (enc != XML_CHAR_ENCODING_NONE) {
11565 xmlSwitchEncoding(ctxt, enc);
11566 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011567 }
11568
Owen Taylor3473f882001-02-23 17:55:21 +000011569 /*
11570 * Parse a possible text declaration first
11571 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011572 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011573 xmlParseTextDecl(ctxt);
11574 }
11575
11576 /*
11577 * Doing validity checking on chunk doesn't make sense
11578 */
11579 ctxt->instate = XML_PARSER_CONTENT;
11580 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011581 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011582 ctxt->loadsubset = ctx->loadsubset;
11583 ctxt->depth = ctx->depth + 1;
11584 ctxt->replaceEntities = ctx->replaceEntities;
11585 if (ctxt->validate) {
11586 ctxt->vctxt.error = ctx->vctxt.error;
11587 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011588 } else {
11589 ctxt->vctxt.error = NULL;
11590 ctxt->vctxt.warning = NULL;
11591 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011592 ctxt->vctxt.nodeTab = NULL;
11593 ctxt->vctxt.nodeNr = 0;
11594 ctxt->vctxt.nodeMax = 0;
11595 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011596 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11597 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011598 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11599 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11600 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011601 ctxt->dictNames = ctx->dictNames;
11602 ctxt->attsDefault = ctx->attsDefault;
11603 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011604 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011605
11606 xmlParseContent(ctxt);
11607
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011608 ctx->validate = ctxt->validate;
11609 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011610 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011611 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011612 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011613 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011614 }
11615 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011616 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011617 }
11618
11619 if (!ctxt->wellFormed) {
11620 if (ctxt->errNo == 0)
11621 ret = 1;
11622 else
11623 ret = ctxt->errNo;
11624 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011625 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011626 xmlNodePtr cur;
11627
11628 /*
11629 * Return the newly created nodeset after unlinking it from
11630 * they pseudo parent.
11631 */
11632 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011633 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011634 while (cur != NULL) {
11635 cur->parent = NULL;
11636 cur = cur->next;
11637 }
11638 newDoc->children->children = NULL;
11639 }
11640 ret = 0;
11641 }
11642 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011643 ctxt->dict = NULL;
11644 ctxt->attsDefault = NULL;
11645 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011646 xmlFreeParserCtxt(ctxt);
11647 newDoc->intSubset = NULL;
11648 newDoc->extSubset = NULL;
11649 xmlFreeDoc(newDoc);
11650
11651 return(ret);
11652}
11653
11654/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011655 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011656 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011657 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011658 * @sax: the SAX handler bloc (possibly NULL)
11659 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11660 * @depth: Used for loop detection, use 0
11661 * @URL: the URL for the entity to load
11662 * @ID: the System ID for the entity to load
11663 * @list: the return value for the set of parsed nodes
11664 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011665 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011666 *
11667 * Returns 0 if the entity is well formed, -1 in case of args problem and
11668 * the parser error code otherwise
11669 */
11670
Daniel Veillard7d515752003-09-26 19:12:37 +000011671static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011672xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11673 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011674 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011675 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011676 xmlParserCtxtPtr ctxt;
11677 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011678 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011679 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011680 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011681 xmlChar start[4];
11682 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011683
11684 if (depth > 40) {
11685 return(XML_ERR_ENTITY_LOOP);
11686 }
11687
11688
11689
11690 if (list != NULL)
11691 *list = NULL;
11692 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011693 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011694 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011695 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011696
11697
11698 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011699 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011700 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011701 if (oldctxt != NULL) {
11702 ctxt->_private = oldctxt->_private;
11703 ctxt->loadsubset = oldctxt->loadsubset;
11704 ctxt->validate = oldctxt->validate;
11705 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011706 ctxt->record_info = oldctxt->record_info;
11707 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11708 ctxt->node_seq.length = oldctxt->node_seq.length;
11709 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011710 } else {
11711 /*
11712 * Doing validity checking on chunk without context
11713 * doesn't make sense
11714 */
11715 ctxt->_private = NULL;
11716 ctxt->validate = 0;
11717 ctxt->external = 2;
11718 ctxt->loadsubset = 0;
11719 }
Owen Taylor3473f882001-02-23 17:55:21 +000011720 if (sax != NULL) {
11721 oldsax = ctxt->sax;
11722 ctxt->sax = sax;
11723 if (user_data != NULL)
11724 ctxt->userData = user_data;
11725 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011726 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011727 newDoc = xmlNewDoc(BAD_CAST "1.0");
11728 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011729 ctxt->node_seq.maximum = 0;
11730 ctxt->node_seq.length = 0;
11731 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011733 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011734 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011735 newDoc->intSubset = doc->intSubset;
11736 newDoc->extSubset = doc->extSubset;
11737 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011738 xmlDictReference(newDoc->dict);
11739
Owen Taylor3473f882001-02-23 17:55:21 +000011740 if (doc->URL != NULL) {
11741 newDoc->URL = xmlStrdup(doc->URL);
11742 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011743 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11744 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011745 if (sax != NULL)
11746 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011747 ctxt->node_seq.maximum = 0;
11748 ctxt->node_seq.length = 0;
11749 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 xmlFreeParserCtxt(ctxt);
11751 newDoc->intSubset = NULL;
11752 newDoc->extSubset = NULL;
11753 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011754 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011755 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011756 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011757 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011758 ctxt->myDoc = doc;
11759 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011760
Daniel Veillard87a764e2001-06-20 17:41:10 +000011761 /*
11762 * Get the 4 first bytes and decode the charset
11763 * if enc != XML_CHAR_ENCODING_NONE
11764 * plug some encoding conversion routines.
11765 */
11766 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011767 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11768 start[0] = RAW;
11769 start[1] = NXT(1);
11770 start[2] = NXT(2);
11771 start[3] = NXT(3);
11772 enc = xmlDetectCharEncoding(start, 4);
11773 if (enc != XML_CHAR_ENCODING_NONE) {
11774 xmlSwitchEncoding(ctxt, enc);
11775 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011776 }
11777
Owen Taylor3473f882001-02-23 17:55:21 +000011778 /*
11779 * Parse a possible text declaration first
11780 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011781 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011782 xmlParseTextDecl(ctxt);
11783 }
11784
Owen Taylor3473f882001-02-23 17:55:21 +000011785 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011786 ctxt->depth = depth;
11787
11788 xmlParseContent(ctxt);
11789
Daniel Veillard561b7f82002-03-20 21:55:57 +000011790 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011791 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011792 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011793 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011794 }
11795 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011796 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011797 }
11798
11799 if (!ctxt->wellFormed) {
11800 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011801 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011802 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011803 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011804 } else {
11805 if (list != NULL) {
11806 xmlNodePtr cur;
11807
11808 /*
11809 * Return the newly created nodeset after unlinking it from
11810 * they pseudo parent.
11811 */
11812 cur = newDoc->children->children;
11813 *list = cur;
11814 while (cur != NULL) {
11815 cur->parent = NULL;
11816 cur = cur->next;
11817 }
11818 newDoc->children->children = NULL;
11819 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011820 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011821 }
11822 if (sax != NULL)
11823 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011824 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11825 oldctxt->node_seq.length = ctxt->node_seq.length;
11826 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011827 ctxt->node_seq.maximum = 0;
11828 ctxt->node_seq.length = 0;
11829 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 xmlFreeParserCtxt(ctxt);
11831 newDoc->intSubset = NULL;
11832 newDoc->extSubset = NULL;
11833 xmlFreeDoc(newDoc);
11834
11835 return(ret);
11836}
11837
Daniel Veillard81273902003-09-30 00:43:48 +000011838#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011839/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011840 * xmlParseExternalEntity:
11841 * @doc: the document the chunk pertains to
11842 * @sax: the SAX handler bloc (possibly NULL)
11843 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11844 * @depth: Used for loop detection, use 0
11845 * @URL: the URL for the entity to load
11846 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011847 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011848 *
11849 * Parse an external general entity
11850 * An external general parsed entity is well-formed if it matches the
11851 * production labeled extParsedEnt.
11852 *
11853 * [78] extParsedEnt ::= TextDecl? content
11854 *
11855 * Returns 0 if the entity is well formed, -1 in case of args problem and
11856 * the parser error code otherwise
11857 */
11858
11859int
11860xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011861 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011862 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011863 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011864}
11865
11866/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011867 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011868 * @doc: the document the chunk pertains to
11869 * @sax: the SAX handler bloc (possibly NULL)
11870 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11871 * @depth: Used for loop detection, use 0
11872 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011873 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011874 *
11875 * Parse a well-balanced chunk of an XML document
11876 * called by the parser
11877 * The allowed sequence for the Well Balanced Chunk is the one defined by
11878 * the content production in the XML grammar:
11879 *
11880 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11881 *
11882 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11883 * the parser error code otherwise
11884 */
11885
11886int
11887xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011888 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011889 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11890 depth, string, lst, 0 );
11891}
Daniel Veillard81273902003-09-30 00:43:48 +000011892#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011893
11894/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011895 * xmlParseBalancedChunkMemoryInternal:
11896 * @oldctxt: the existing parsing context
11897 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11898 * @user_data: the user data field for the parser context
11899 * @lst: the return value for the set of parsed nodes
11900 *
11901 *
11902 * Parse a well-balanced chunk of an XML document
11903 * called by the parser
11904 * The allowed sequence for the Well Balanced Chunk is the one defined by
11905 * the content production in the XML grammar:
11906 *
11907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11908 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011909 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11910 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011911 *
11912 * In case recover is set to 1, the nodelist will not be empty even if
11913 * the parsed chunk is not well balanced.
11914 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011915static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011916xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11917 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11918 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011919 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011920 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011921 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011922 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011923 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011924 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011925 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011926
11927 if (oldctxt->depth > 40) {
11928 return(XML_ERR_ENTITY_LOOP);
11929 }
11930
11931
11932 if (lst != NULL)
11933 *lst = NULL;
11934 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011935 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011936
11937 size = xmlStrlen(string);
11938
11939 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011940 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011941 if (user_data != NULL)
11942 ctxt->userData = user_data;
11943 else
11944 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011945 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11946 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011947 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11948 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11949 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011950
11951 oldsax = ctxt->sax;
11952 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011953 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011954 ctxt->replaceEntities = oldctxt->replaceEntities;
11955 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011956
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011957 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011958 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011959 newDoc = xmlNewDoc(BAD_CAST "1.0");
11960 if (newDoc == NULL) {
11961 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011962 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011963 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011964 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011965 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011966 newDoc->dict = ctxt->dict;
11967 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011968 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011969 } else {
11970 ctxt->myDoc = oldctxt->myDoc;
11971 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011972 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011973 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011974 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11975 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011976 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011977 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011978 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011979 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011980 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011981 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011982 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011983 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011984 ctxt->myDoc->children = NULL;
11985 ctxt->myDoc->last = NULL;
11986 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011987 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011988 ctxt->instate = XML_PARSER_CONTENT;
11989 ctxt->depth = oldctxt->depth + 1;
11990
Daniel Veillard328f48c2002-11-15 15:24:34 +000011991 ctxt->validate = 0;
11992 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011993 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11994 /*
11995 * ID/IDREF registration will be done in xmlValidateElement below
11996 */
11997 ctxt->loadsubset |= XML_SKIP_IDS;
11998 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011999 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012000 ctxt->attsDefault = oldctxt->attsDefault;
12001 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012002
Daniel Veillard68e9e742002-11-16 15:35:11 +000012003 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012004 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012005 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012006 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012007 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012008 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012009 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012010 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012011 }
12012
12013 if (!ctxt->wellFormed) {
12014 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012015 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012016 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012017 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012018 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012019 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012020 }
12021
William M. Brack7b9154b2003-09-27 19:23:50 +000012022 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012023 xmlNodePtr cur;
12024
12025 /*
12026 * Return the newly created nodeset after unlinking it from
12027 * they pseudo parent.
12028 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012029 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012030 *lst = cur;
12031 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012032#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012033 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12034 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12035 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012036 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12037 oldctxt->myDoc, cur);
12038 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012039#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012040 cur->parent = NULL;
12041 cur = cur->next;
12042 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012043 ctxt->myDoc->children->children = NULL;
12044 }
12045 if (ctxt->myDoc != NULL) {
12046 xmlFreeNode(ctxt->myDoc->children);
12047 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012048 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012049 }
12050
12051 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012052 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012053 ctxt->attsDefault = NULL;
12054 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012055 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012056 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012057 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012058 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012059
12060 return(ret);
12061}
12062
Daniel Veillard29b17482004-08-16 00:39:03 +000012063/**
12064 * xmlParseInNodeContext:
12065 * @node: the context node
12066 * @data: the input string
12067 * @datalen: the input string length in bytes
12068 * @options: a combination of xmlParserOption
12069 * @lst: the return value for the set of parsed nodes
12070 *
12071 * Parse a well-balanced chunk of an XML document
12072 * within the context (DTD, namespaces, etc ...) of the given node.
12073 *
12074 * The allowed sequence for the data is a Well Balanced Chunk defined by
12075 * the content production in the XML grammar:
12076 *
12077 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12078 *
12079 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12080 * error code otherwise
12081 */
12082xmlParserErrors
12083xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12084 int options, xmlNodePtr *lst) {
12085#ifdef SAX2
12086 xmlParserCtxtPtr ctxt;
12087 xmlDocPtr doc = NULL;
12088 xmlNodePtr fake, cur;
12089 int nsnr = 0;
12090
12091 xmlParserErrors ret = XML_ERR_OK;
12092
12093 /*
12094 * check all input parameters, grab the document
12095 */
12096 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12097 return(XML_ERR_INTERNAL_ERROR);
12098 switch (node->type) {
12099 case XML_ELEMENT_NODE:
12100 case XML_ATTRIBUTE_NODE:
12101 case XML_TEXT_NODE:
12102 case XML_CDATA_SECTION_NODE:
12103 case XML_ENTITY_REF_NODE:
12104 case XML_PI_NODE:
12105 case XML_COMMENT_NODE:
12106 case XML_DOCUMENT_NODE:
12107 case XML_HTML_DOCUMENT_NODE:
12108 break;
12109 default:
12110 return(XML_ERR_INTERNAL_ERROR);
12111
12112 }
12113 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12114 (node->type != XML_DOCUMENT_NODE) &&
12115 (node->type != XML_HTML_DOCUMENT_NODE))
12116 node = node->parent;
12117 if (node == NULL)
12118 return(XML_ERR_INTERNAL_ERROR);
12119 if (node->type == XML_ELEMENT_NODE)
12120 doc = node->doc;
12121 else
12122 doc = (xmlDocPtr) node;
12123 if (doc == NULL)
12124 return(XML_ERR_INTERNAL_ERROR);
12125
12126 /*
12127 * allocate a context and set-up everything not related to the
12128 * node position in the tree
12129 */
12130 if (doc->type == XML_DOCUMENT_NODE)
12131 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12132#ifdef LIBXML_HTML_ENABLED
12133 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12134 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12135#endif
12136 else
12137 return(XML_ERR_INTERNAL_ERROR);
12138
12139 if (ctxt == NULL)
12140 return(XML_ERR_NO_MEMORY);
12141 fake = xmlNewComment(NULL);
12142 if (fake == NULL) {
12143 xmlFreeParserCtxt(ctxt);
12144 return(XML_ERR_NO_MEMORY);
12145 }
12146 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012147
12148 /*
12149 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12150 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12151 * we must wait until the last moment to free the original one.
12152 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012153 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012154 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012155 xmlDictFree(ctxt->dict);
12156 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012157 } else
12158 options |= XML_PARSE_NODICT;
12159
12160 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012161 xmlDetectSAX2(ctxt);
12162 ctxt->myDoc = doc;
12163
12164 if (node->type == XML_ELEMENT_NODE) {
12165 nodePush(ctxt, node);
12166 /*
12167 * initialize the SAX2 namespaces stack
12168 */
12169 cur = node;
12170 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12171 xmlNsPtr ns = cur->nsDef;
12172 const xmlChar *iprefix, *ihref;
12173
12174 while (ns != NULL) {
12175 if (ctxt->dict) {
12176 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12177 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12178 } else {
12179 iprefix = ns->prefix;
12180 ihref = ns->href;
12181 }
12182
12183 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12184 nsPush(ctxt, iprefix, ihref);
12185 nsnr++;
12186 }
12187 ns = ns->next;
12188 }
12189 cur = cur->parent;
12190 }
12191 ctxt->instate = XML_PARSER_CONTENT;
12192 }
12193
12194 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12195 /*
12196 * ID/IDREF registration will be done in xmlValidateElement below
12197 */
12198 ctxt->loadsubset |= XML_SKIP_IDS;
12199 }
12200
Daniel Veillard499cc922006-01-18 17:22:35 +000012201#ifdef LIBXML_HTML_ENABLED
12202 if (doc->type == XML_HTML_DOCUMENT_NODE)
12203 __htmlParseContent(ctxt);
12204 else
12205#endif
12206 xmlParseContent(ctxt);
12207
Daniel Veillard29b17482004-08-16 00:39:03 +000012208 nsPop(ctxt, nsnr);
12209 if ((RAW == '<') && (NXT(1) == '/')) {
12210 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12211 } else if (RAW != 0) {
12212 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12213 }
12214 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12215 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12216 ctxt->wellFormed = 0;
12217 }
12218
12219 if (!ctxt->wellFormed) {
12220 if (ctxt->errNo == 0)
12221 ret = XML_ERR_INTERNAL_ERROR;
12222 else
12223 ret = (xmlParserErrors)ctxt->errNo;
12224 } else {
12225 ret = XML_ERR_OK;
12226 }
12227
12228 /*
12229 * Return the newly created nodeset after unlinking it from
12230 * the pseudo sibling.
12231 */
12232
12233 cur = fake->next;
12234 fake->next = NULL;
12235 node->last = fake;
12236
12237 if (cur != NULL) {
12238 cur->prev = NULL;
12239 }
12240
12241 *lst = cur;
12242
12243 while (cur != NULL) {
12244 cur->parent = NULL;
12245 cur = cur->next;
12246 }
12247
12248 xmlUnlinkNode(fake);
12249 xmlFreeNode(fake);
12250
12251
12252 if (ret != XML_ERR_OK) {
12253 xmlFreeNodeList(*lst);
12254 *lst = NULL;
12255 }
William M. Brackc3f81342004-10-03 01:22:44 +000012256
William M. Brackb7b54de2004-10-06 16:38:01 +000012257 if (doc->dict != NULL)
12258 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012259 xmlFreeParserCtxt(ctxt);
12260
12261 return(ret);
12262#else /* !SAX2 */
12263 return(XML_ERR_INTERNAL_ERROR);
12264#endif
12265}
12266
Daniel Veillard81273902003-09-30 00:43:48 +000012267#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012268/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012269 * xmlParseBalancedChunkMemoryRecover:
12270 * @doc: the document the chunk pertains to
12271 * @sax: the SAX handler bloc (possibly NULL)
12272 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12273 * @depth: Used for loop detection, use 0
12274 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12275 * @lst: the return value for the set of parsed nodes
12276 * @recover: return nodes even if the data is broken (use 0)
12277 *
12278 *
12279 * Parse a well-balanced chunk of an XML document
12280 * called by the parser
12281 * The allowed sequence for the Well Balanced Chunk is the one defined by
12282 * the content production in the XML grammar:
12283 *
12284 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12285 *
12286 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12287 * the parser error code otherwise
12288 *
12289 * In case recover is set to 1, the nodelist will not be empty even if
12290 * the parsed chunk is not well balanced.
12291 */
12292int
12293xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12295 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012296 xmlParserCtxtPtr ctxt;
12297 xmlDocPtr newDoc;
12298 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012299 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012300 int size;
12301 int ret = 0;
12302
12303 if (depth > 40) {
12304 return(XML_ERR_ENTITY_LOOP);
12305 }
12306
12307
Daniel Veillardcda96922001-08-21 10:56:31 +000012308 if (lst != NULL)
12309 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012310 if (string == NULL)
12311 return(-1);
12312
12313 size = xmlStrlen(string);
12314
12315 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12316 if (ctxt == NULL) return(-1);
12317 ctxt->userData = ctxt;
12318 if (sax != NULL) {
12319 oldsax = ctxt->sax;
12320 ctxt->sax = sax;
12321 if (user_data != NULL)
12322 ctxt->userData = user_data;
12323 }
12324 newDoc = xmlNewDoc(BAD_CAST "1.0");
12325 if (newDoc == NULL) {
12326 xmlFreeParserCtxt(ctxt);
12327 return(-1);
12328 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012329 if ((doc != NULL) && (doc->dict != NULL)) {
12330 xmlDictFree(ctxt->dict);
12331 ctxt->dict = doc->dict;
12332 xmlDictReference(ctxt->dict);
12333 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12334 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12335 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12336 ctxt->dictNames = 1;
12337 } else {
12338 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12339 }
Owen Taylor3473f882001-02-23 17:55:21 +000012340 if (doc != NULL) {
12341 newDoc->intSubset = doc->intSubset;
12342 newDoc->extSubset = doc->extSubset;
12343 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012344 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12345 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012346 if (sax != NULL)
12347 ctxt->sax = oldsax;
12348 xmlFreeParserCtxt(ctxt);
12349 newDoc->intSubset = NULL;
12350 newDoc->extSubset = NULL;
12351 xmlFreeDoc(newDoc);
12352 return(-1);
12353 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012354 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12355 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012356 if (doc == NULL) {
12357 ctxt->myDoc = newDoc;
12358 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012359 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012360 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012361 /* Ensure that doc has XML spec namespace */
12362 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12363 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012364 }
12365 ctxt->instate = XML_PARSER_CONTENT;
12366 ctxt->depth = depth;
12367
12368 /*
12369 * Doing validity checking on chunk doesn't make sense
12370 */
12371 ctxt->validate = 0;
12372 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012373 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012374
Daniel Veillardb39bc392002-10-26 19:29:51 +000012375 if ( doc != NULL ){
12376 content = doc->children;
12377 doc->children = NULL;
12378 xmlParseContent(ctxt);
12379 doc->children = content;
12380 }
12381 else {
12382 xmlParseContent(ctxt);
12383 }
Owen Taylor3473f882001-02-23 17:55:21 +000012384 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012385 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012386 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012387 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012388 }
12389 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012390 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012391 }
12392
12393 if (!ctxt->wellFormed) {
12394 if (ctxt->errNo == 0)
12395 ret = 1;
12396 else
12397 ret = ctxt->errNo;
12398 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012399 ret = 0;
12400 }
12401
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012402 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12403 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012404
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012405 /*
12406 * Return the newly created nodeset after unlinking it from
12407 * they pseudo parent.
12408 */
12409 cur = newDoc->children->children;
12410 *lst = cur;
12411 while (cur != NULL) {
12412 xmlSetTreeDoc(cur, doc);
12413 cur->parent = NULL;
12414 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012415 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012416 newDoc->children->children = NULL;
12417 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012418
Owen Taylor3473f882001-02-23 17:55:21 +000012419 if (sax != NULL)
12420 ctxt->sax = oldsax;
12421 xmlFreeParserCtxt(ctxt);
12422 newDoc->intSubset = NULL;
12423 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012424 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012425 xmlFreeDoc(newDoc);
12426
12427 return(ret);
12428}
12429
12430/**
12431 * xmlSAXParseEntity:
12432 * @sax: the SAX handler block
12433 * @filename: the filename
12434 *
12435 * parse an XML external entity out of context and build a tree.
12436 * It use the given SAX function block to handle the parsing callback.
12437 * If sax is NULL, fallback to the default DOM tree building routines.
12438 *
12439 * [78] extParsedEnt ::= TextDecl? content
12440 *
12441 * This correspond to a "Well Balanced" chunk
12442 *
12443 * Returns the resulting document tree
12444 */
12445
12446xmlDocPtr
12447xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12448 xmlDocPtr ret;
12449 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012450
12451 ctxt = xmlCreateFileParserCtxt(filename);
12452 if (ctxt == NULL) {
12453 return(NULL);
12454 }
12455 if (sax != NULL) {
12456 if (ctxt->sax != NULL)
12457 xmlFree(ctxt->sax);
12458 ctxt->sax = sax;
12459 ctxt->userData = NULL;
12460 }
12461
Owen Taylor3473f882001-02-23 17:55:21 +000012462 xmlParseExtParsedEnt(ctxt);
12463
12464 if (ctxt->wellFormed)
12465 ret = ctxt->myDoc;
12466 else {
12467 ret = NULL;
12468 xmlFreeDoc(ctxt->myDoc);
12469 ctxt->myDoc = NULL;
12470 }
12471 if (sax != NULL)
12472 ctxt->sax = NULL;
12473 xmlFreeParserCtxt(ctxt);
12474
12475 return(ret);
12476}
12477
12478/**
12479 * xmlParseEntity:
12480 * @filename: the filename
12481 *
12482 * parse an XML external entity out of context and build a tree.
12483 *
12484 * [78] extParsedEnt ::= TextDecl? content
12485 *
12486 * This correspond to a "Well Balanced" chunk
12487 *
12488 * Returns the resulting document tree
12489 */
12490
12491xmlDocPtr
12492xmlParseEntity(const char *filename) {
12493 return(xmlSAXParseEntity(NULL, filename));
12494}
Daniel Veillard81273902003-09-30 00:43:48 +000012495#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012496
12497/**
12498 * xmlCreateEntityParserCtxt:
12499 * @URL: the entity URL
12500 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012501 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012502 *
12503 * Create a parser context for an external entity
12504 * Automatic support for ZLIB/Compress compressed document is provided
12505 * by default if found at compile-time.
12506 *
12507 * Returns the new parser context or NULL
12508 */
12509xmlParserCtxtPtr
12510xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12511 const xmlChar *base) {
12512 xmlParserCtxtPtr ctxt;
12513 xmlParserInputPtr inputStream;
12514 char *directory = NULL;
12515 xmlChar *uri;
12516
12517 ctxt = xmlNewParserCtxt();
12518 if (ctxt == NULL) {
12519 return(NULL);
12520 }
12521
12522 uri = xmlBuildURI(URL, base);
12523
12524 if (uri == NULL) {
12525 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12526 if (inputStream == NULL) {
12527 xmlFreeParserCtxt(ctxt);
12528 return(NULL);
12529 }
12530
12531 inputPush(ctxt, inputStream);
12532
12533 if ((ctxt->directory == NULL) && (directory == NULL))
12534 directory = xmlParserGetDirectory((char *)URL);
12535 if ((ctxt->directory == NULL) && (directory != NULL))
12536 ctxt->directory = directory;
12537 } else {
12538 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12539 if (inputStream == NULL) {
12540 xmlFree(uri);
12541 xmlFreeParserCtxt(ctxt);
12542 return(NULL);
12543 }
12544
12545 inputPush(ctxt, inputStream);
12546
12547 if ((ctxt->directory == NULL) && (directory == NULL))
12548 directory = xmlParserGetDirectory((char *)uri);
12549 if ((ctxt->directory == NULL) && (directory != NULL))
12550 ctxt->directory = directory;
12551 xmlFree(uri);
12552 }
Owen Taylor3473f882001-02-23 17:55:21 +000012553 return(ctxt);
12554}
12555
12556/************************************************************************
12557 * *
12558 * Front ends when parsing from a file *
12559 * *
12560 ************************************************************************/
12561
12562/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012563 * xmlCreateURLParserCtxt:
12564 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012565 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012566 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012567 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012568 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012569 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012570 *
12571 * Returns the new parser context or NULL
12572 */
12573xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012574xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012575{
12576 xmlParserCtxtPtr ctxt;
12577 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012578 char *directory = NULL;
12579
Owen Taylor3473f882001-02-23 17:55:21 +000012580 ctxt = xmlNewParserCtxt();
12581 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012582 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012583 return(NULL);
12584 }
12585
Daniel Veillarddf292f72005-01-16 19:00:15 +000012586 if (options)
12587 xmlCtxtUseOptions(ctxt, options);
12588 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012589
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012590 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012591 if (inputStream == NULL) {
12592 xmlFreeParserCtxt(ctxt);
12593 return(NULL);
12594 }
12595
Owen Taylor3473f882001-02-23 17:55:21 +000012596 inputPush(ctxt, inputStream);
12597 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012598 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012599 if ((ctxt->directory == NULL) && (directory != NULL))
12600 ctxt->directory = directory;
12601
12602 return(ctxt);
12603}
12604
Daniel Veillard61b93382003-11-03 14:28:31 +000012605/**
12606 * xmlCreateFileParserCtxt:
12607 * @filename: the filename
12608 *
12609 * Create a parser context for a file content.
12610 * Automatic support for ZLIB/Compress compressed document is provided
12611 * by default if found at compile-time.
12612 *
12613 * Returns the new parser context or NULL
12614 */
12615xmlParserCtxtPtr
12616xmlCreateFileParserCtxt(const char *filename)
12617{
12618 return(xmlCreateURLParserCtxt(filename, 0));
12619}
12620
Daniel Veillard81273902003-09-30 00:43:48 +000012621#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012622/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012623 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012624 * @sax: the SAX handler block
12625 * @filename: the filename
12626 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12627 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012628 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012629 *
12630 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12631 * compressed document is provided by default if found at compile-time.
12632 * It use the given SAX function block to handle the parsing callback.
12633 * If sax is NULL, fallback to the default DOM tree building routines.
12634 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012635 * User data (void *) is stored within the parser context in the
12636 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012637 *
Owen Taylor3473f882001-02-23 17:55:21 +000012638 * Returns the resulting document tree
12639 */
12640
12641xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012642xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12643 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012644 xmlDocPtr ret;
12645 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012646
Daniel Veillard635ef722001-10-29 11:48:19 +000012647 xmlInitParser();
12648
Owen Taylor3473f882001-02-23 17:55:21 +000012649 ctxt = xmlCreateFileParserCtxt(filename);
12650 if (ctxt == NULL) {
12651 return(NULL);
12652 }
12653 if (sax != NULL) {
12654 if (ctxt->sax != NULL)
12655 xmlFree(ctxt->sax);
12656 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012657 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012658 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012659 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012660 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012661 }
Owen Taylor3473f882001-02-23 17:55:21 +000012662
Daniel Veillard37d2d162008-03-14 10:54:00 +000012663 if (ctxt->directory == NULL)
12664 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012665
Daniel Veillarddad3f682002-11-17 16:47:27 +000012666 ctxt->recovery = recovery;
12667
Owen Taylor3473f882001-02-23 17:55:21 +000012668 xmlParseDocument(ctxt);
12669
William M. Brackc07329e2003-09-08 01:57:30 +000012670 if ((ctxt->wellFormed) || recovery) {
12671 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012672 if (ret != NULL) {
12673 if (ctxt->input->buf->compressed > 0)
12674 ret->compression = 9;
12675 else
12676 ret->compression = ctxt->input->buf->compressed;
12677 }
William M. Brackc07329e2003-09-08 01:57:30 +000012678 }
Owen Taylor3473f882001-02-23 17:55:21 +000012679 else {
12680 ret = NULL;
12681 xmlFreeDoc(ctxt->myDoc);
12682 ctxt->myDoc = NULL;
12683 }
12684 if (sax != NULL)
12685 ctxt->sax = NULL;
12686 xmlFreeParserCtxt(ctxt);
12687
12688 return(ret);
12689}
12690
12691/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012692 * xmlSAXParseFile:
12693 * @sax: the SAX handler block
12694 * @filename: the filename
12695 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12696 * documents
12697 *
12698 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12699 * compressed document is provided by default if found at compile-time.
12700 * It use the given SAX function block to handle the parsing callback.
12701 * If sax is NULL, fallback to the default DOM tree building routines.
12702 *
12703 * Returns the resulting document tree
12704 */
12705
12706xmlDocPtr
12707xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12708 int recovery) {
12709 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12710}
12711
12712/**
Owen Taylor3473f882001-02-23 17:55:21 +000012713 * xmlRecoverDoc:
12714 * @cur: a pointer to an array of xmlChar
12715 *
12716 * parse an XML in-memory document and build a tree.
12717 * In the case the document is not Well Formed, a tree is built anyway
12718 *
12719 * Returns the resulting document tree
12720 */
12721
12722xmlDocPtr
12723xmlRecoverDoc(xmlChar *cur) {
12724 return(xmlSAXParseDoc(NULL, cur, 1));
12725}
12726
12727/**
12728 * xmlParseFile:
12729 * @filename: the filename
12730 *
12731 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12732 * compressed document is provided by default if found at compile-time.
12733 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012734 * Returns the resulting document tree if the file was wellformed,
12735 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012736 */
12737
12738xmlDocPtr
12739xmlParseFile(const char *filename) {
12740 return(xmlSAXParseFile(NULL, filename, 0));
12741}
12742
12743/**
12744 * xmlRecoverFile:
12745 * @filename: the filename
12746 *
12747 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12748 * compressed document is provided by default if found at compile-time.
12749 * In the case the document is not Well Formed, a tree is built anyway
12750 *
12751 * Returns the resulting document tree
12752 */
12753
12754xmlDocPtr
12755xmlRecoverFile(const char *filename) {
12756 return(xmlSAXParseFile(NULL, filename, 1));
12757}
12758
12759
12760/**
12761 * xmlSetupParserForBuffer:
12762 * @ctxt: an XML parser context
12763 * @buffer: a xmlChar * buffer
12764 * @filename: a file name
12765 *
12766 * Setup the parser context to parse a new buffer; Clears any prior
12767 * contents from the parser context. The buffer parameter must not be
12768 * NULL, but the filename parameter can be
12769 */
12770void
12771xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12772 const char* filename)
12773{
12774 xmlParserInputPtr input;
12775
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012776 if ((ctxt == NULL) || (buffer == NULL))
12777 return;
12778
Owen Taylor3473f882001-02-23 17:55:21 +000012779 input = xmlNewInputStream(ctxt);
12780 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012781 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012782 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012783 return;
12784 }
12785
12786 xmlClearParserCtxt(ctxt);
12787 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012788 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012789 input->base = buffer;
12790 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012791 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012792 inputPush(ctxt, input);
12793}
12794
12795/**
12796 * xmlSAXUserParseFile:
12797 * @sax: a SAX handler
12798 * @user_data: The user data returned on SAX callbacks
12799 * @filename: a file name
12800 *
12801 * parse an XML file and call the given SAX handler routines.
12802 * Automatic support for ZLIB/Compress compressed document is provided
12803 *
12804 * Returns 0 in case of success or a error number otherwise
12805 */
12806int
12807xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12808 const char *filename) {
12809 int ret = 0;
12810 xmlParserCtxtPtr ctxt;
12811
12812 ctxt = xmlCreateFileParserCtxt(filename);
12813 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012814 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012815 xmlFree(ctxt->sax);
12816 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012817 xmlDetectSAX2(ctxt);
12818
Owen Taylor3473f882001-02-23 17:55:21 +000012819 if (user_data != NULL)
12820 ctxt->userData = user_data;
12821
12822 xmlParseDocument(ctxt);
12823
12824 if (ctxt->wellFormed)
12825 ret = 0;
12826 else {
12827 if (ctxt->errNo != 0)
12828 ret = ctxt->errNo;
12829 else
12830 ret = -1;
12831 }
12832 if (sax != NULL)
12833 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012834 if (ctxt->myDoc != NULL) {
12835 xmlFreeDoc(ctxt->myDoc);
12836 ctxt->myDoc = NULL;
12837 }
Owen Taylor3473f882001-02-23 17:55:21 +000012838 xmlFreeParserCtxt(ctxt);
12839
12840 return ret;
12841}
Daniel Veillard81273902003-09-30 00:43:48 +000012842#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012843
12844/************************************************************************
12845 * *
12846 * Front ends when parsing from memory *
12847 * *
12848 ************************************************************************/
12849
12850/**
12851 * xmlCreateMemoryParserCtxt:
12852 * @buffer: a pointer to a char array
12853 * @size: the size of the array
12854 *
12855 * Create a parser context for an XML in-memory document.
12856 *
12857 * Returns the new parser context or NULL
12858 */
12859xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012860xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012861 xmlParserCtxtPtr ctxt;
12862 xmlParserInputPtr input;
12863 xmlParserInputBufferPtr buf;
12864
12865 if (buffer == NULL)
12866 return(NULL);
12867 if (size <= 0)
12868 return(NULL);
12869
12870 ctxt = xmlNewParserCtxt();
12871 if (ctxt == NULL)
12872 return(NULL);
12873
Daniel Veillard53350552003-09-18 13:35:51 +000012874 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012875 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012876 if (buf == NULL) {
12877 xmlFreeParserCtxt(ctxt);
12878 return(NULL);
12879 }
Owen Taylor3473f882001-02-23 17:55:21 +000012880
12881 input = xmlNewInputStream(ctxt);
12882 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012883 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012884 xmlFreeParserCtxt(ctxt);
12885 return(NULL);
12886 }
12887
12888 input->filename = NULL;
12889 input->buf = buf;
12890 input->base = input->buf->buffer->content;
12891 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012892 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012893
12894 inputPush(ctxt, input);
12895 return(ctxt);
12896}
12897
Daniel Veillard81273902003-09-30 00:43:48 +000012898#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012899/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012900 * xmlSAXParseMemoryWithData:
12901 * @sax: the SAX handler block
12902 * @buffer: an pointer to a char array
12903 * @size: the size of the array
12904 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12905 * documents
12906 * @data: the userdata
12907 *
12908 * parse an XML in-memory block and use the given SAX function block
12909 * to handle the parsing callback. If sax is NULL, fallback to the default
12910 * DOM tree building routines.
12911 *
12912 * User data (void *) is stored within the parser context in the
12913 * context's _private member, so it is available nearly everywhere in libxml
12914 *
12915 * Returns the resulting document tree
12916 */
12917
12918xmlDocPtr
12919xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12920 int size, int recovery, void *data) {
12921 xmlDocPtr ret;
12922 xmlParserCtxtPtr ctxt;
12923
12924 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12925 if (ctxt == NULL) return(NULL);
12926 if (sax != NULL) {
12927 if (ctxt->sax != NULL)
12928 xmlFree(ctxt->sax);
12929 ctxt->sax = sax;
12930 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012931 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012932 if (data!=NULL) {
12933 ctxt->_private=data;
12934 }
12935
Daniel Veillardadba5f12003-04-04 16:09:01 +000012936 ctxt->recovery = recovery;
12937
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012938 xmlParseDocument(ctxt);
12939
12940 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12941 else {
12942 ret = NULL;
12943 xmlFreeDoc(ctxt->myDoc);
12944 ctxt->myDoc = NULL;
12945 }
12946 if (sax != NULL)
12947 ctxt->sax = NULL;
12948 xmlFreeParserCtxt(ctxt);
12949
12950 return(ret);
12951}
12952
12953/**
Owen Taylor3473f882001-02-23 17:55:21 +000012954 * xmlSAXParseMemory:
12955 * @sax: the SAX handler block
12956 * @buffer: an pointer to a char array
12957 * @size: the size of the array
12958 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12959 * documents
12960 *
12961 * parse an XML in-memory block and use the given SAX function block
12962 * to handle the parsing callback. If sax is NULL, fallback to the default
12963 * DOM tree building routines.
12964 *
12965 * Returns the resulting document tree
12966 */
12967xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012968xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12969 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012970 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012971}
12972
12973/**
12974 * xmlParseMemory:
12975 * @buffer: an pointer to a char array
12976 * @size: the size of the array
12977 *
12978 * parse an XML in-memory block and build a tree.
12979 *
12980 * Returns the resulting document tree
12981 */
12982
Daniel Veillard50822cb2001-07-26 20:05:51 +000012983xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012984 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12985}
12986
12987/**
12988 * xmlRecoverMemory:
12989 * @buffer: an pointer to a char array
12990 * @size: the size of the array
12991 *
12992 * parse an XML in-memory block and build a tree.
12993 * In the case the document is not Well Formed, a tree is built anyway
12994 *
12995 * Returns the resulting document tree
12996 */
12997
Daniel Veillard50822cb2001-07-26 20:05:51 +000012998xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012999 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13000}
13001
13002/**
13003 * xmlSAXUserParseMemory:
13004 * @sax: a SAX handler
13005 * @user_data: The user data returned on SAX callbacks
13006 * @buffer: an in-memory XML document input
13007 * @size: the length of the XML document in bytes
13008 *
13009 * A better SAX parsing routine.
13010 * parse an XML in-memory buffer and call the given SAX handler routines.
13011 *
13012 * Returns 0 in case of success or a error number otherwise
13013 */
13014int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013015 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013016 int ret = 0;
13017 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013018
13019 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13020 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013021 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13022 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013023 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013024 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013025
Daniel Veillard30211a02001-04-26 09:33:18 +000013026 if (user_data != NULL)
13027 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013028
13029 xmlParseDocument(ctxt);
13030
13031 if (ctxt->wellFormed)
13032 ret = 0;
13033 else {
13034 if (ctxt->errNo != 0)
13035 ret = ctxt->errNo;
13036 else
13037 ret = -1;
13038 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013039 if (sax != NULL)
13040 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013041 if (ctxt->myDoc != NULL) {
13042 xmlFreeDoc(ctxt->myDoc);
13043 ctxt->myDoc = NULL;
13044 }
Owen Taylor3473f882001-02-23 17:55:21 +000013045 xmlFreeParserCtxt(ctxt);
13046
13047 return ret;
13048}
Daniel Veillard81273902003-09-30 00:43:48 +000013049#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013050
13051/**
13052 * xmlCreateDocParserCtxt:
13053 * @cur: a pointer to an array of xmlChar
13054 *
13055 * Creates a parser context for an XML in-memory document.
13056 *
13057 * Returns the new parser context or NULL
13058 */
13059xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013060xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013061 int len;
13062
13063 if (cur == NULL)
13064 return(NULL);
13065 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013066 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013067}
13068
Daniel Veillard81273902003-09-30 00:43:48 +000013069#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013070/**
13071 * xmlSAXParseDoc:
13072 * @sax: the SAX handler block
13073 * @cur: a pointer to an array of xmlChar
13074 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13075 * documents
13076 *
13077 * parse an XML in-memory document and build a tree.
13078 * It use the given SAX function block to handle the parsing callback.
13079 * If sax is NULL, fallback to the default DOM tree building routines.
13080 *
13081 * Returns the resulting document tree
13082 */
13083
13084xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013085xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013086 xmlDocPtr ret;
13087 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013088 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013089
Daniel Veillard38936062004-11-04 17:45:11 +000013090 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013091
13092
13093 ctxt = xmlCreateDocParserCtxt(cur);
13094 if (ctxt == NULL) return(NULL);
13095 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013096 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013097 ctxt->sax = sax;
13098 ctxt->userData = NULL;
13099 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013100 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013101
13102 xmlParseDocument(ctxt);
13103 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13104 else {
13105 ret = NULL;
13106 xmlFreeDoc(ctxt->myDoc);
13107 ctxt->myDoc = NULL;
13108 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013109 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013110 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013111 xmlFreeParserCtxt(ctxt);
13112
13113 return(ret);
13114}
13115
13116/**
13117 * xmlParseDoc:
13118 * @cur: a pointer to an array of xmlChar
13119 *
13120 * parse an XML in-memory document and build a tree.
13121 *
13122 * Returns the resulting document tree
13123 */
13124
13125xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013126xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013127 return(xmlSAXParseDoc(NULL, cur, 0));
13128}
Daniel Veillard81273902003-09-30 00:43:48 +000013129#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013130
Daniel Veillard81273902003-09-30 00:43:48 +000013131#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013132/************************************************************************
13133 * *
13134 * Specific function to keep track of entities references *
13135 * and used by the XSLT debugger *
13136 * *
13137 ************************************************************************/
13138
13139static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13140
13141/**
13142 * xmlAddEntityReference:
13143 * @ent : A valid entity
13144 * @firstNode : A valid first node for children of entity
13145 * @lastNode : A valid last node of children entity
13146 *
13147 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13148 */
13149static void
13150xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13151 xmlNodePtr lastNode)
13152{
13153 if (xmlEntityRefFunc != NULL) {
13154 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13155 }
13156}
13157
13158
13159/**
13160 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013161 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013162 *
13163 * Set the function to call call back when a xml reference has been made
13164 */
13165void
13166xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13167{
13168 xmlEntityRefFunc = func;
13169}
Daniel Veillard81273902003-09-30 00:43:48 +000013170#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013171
13172/************************************************************************
13173 * *
13174 * Miscellaneous *
13175 * *
13176 ************************************************************************/
13177
13178#ifdef LIBXML_XPATH_ENABLED
13179#include <libxml/xpath.h>
13180#endif
13181
Daniel Veillardffa3c742005-07-21 13:24:09 +000013182extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013183static int xmlParserInitialized = 0;
13184
13185/**
13186 * xmlInitParser:
13187 *
13188 * Initialization function for the XML parser.
13189 * This is not reentrant. Call once before processing in case of
13190 * use in multithreaded programs.
13191 */
13192
13193void
13194xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013195 if (xmlParserInitialized != 0)
13196 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013197
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013198#ifdef LIBXML_THREAD_ENABLED
13199 __xmlGlobalInitMutexLock();
13200 if (xmlParserInitialized == 0) {
13201#endif
13202 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13203 (xmlGenericError == NULL))
13204 initGenericErrorDefaultFunc(NULL);
13205 xmlInitGlobals();
13206 xmlInitThreads();
13207 xmlInitMemory();
13208 xmlInitCharEncodingHandlers();
13209 xmlDefaultSAXHandlerInit();
13210 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013211#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013212 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013213#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013214#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013215 htmlInitAutoClose();
13216 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013217#endif
13218#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013219 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013220#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013221 xmlParserInitialized = 1;
13222#ifdef LIBXML_THREAD_ENABLED
13223 }
13224 __xmlGlobalInitMutexUnlock();
13225#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013226}
13227
13228/**
13229 * xmlCleanupParser:
13230 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013231 * This function name is somewhat misleading. It does not clean up
13232 * parser state, it cleans up memory allocated by the library itself.
13233 * It is a cleanup function for the XML library. It tries to reclaim all
13234 * related global memory allocated for the library processing.
13235 * It doesn't deallocate any document related memory. One should
13236 * call xmlCleanupParser() only when the process has finished using
13237 * the library and all XML/HTML documents built with it.
13238 * See also xmlInitParser() which has the opposite function of preparing
13239 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013240 */
13241
13242void
13243xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013244 if (!xmlParserInitialized)
13245 return;
13246
Owen Taylor3473f882001-02-23 17:55:21 +000013247 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013248#ifdef LIBXML_CATALOG_ENABLED
13249 xmlCatalogCleanup();
13250#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013251 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013252 xmlCleanupInputCallbacks();
13253#ifdef LIBXML_OUTPUT_ENABLED
13254 xmlCleanupOutputCallbacks();
13255#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013256#ifdef LIBXML_SCHEMAS_ENABLED
13257 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013258 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013259#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013260 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013261 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013262 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013263 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013264 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013265}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013266
13267/************************************************************************
13268 * *
13269 * New set (2.6.0) of simpler and more flexible APIs *
13270 * *
13271 ************************************************************************/
13272
13273/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013274 * DICT_FREE:
13275 * @str: a string
13276 *
13277 * Free a string if it is not owned by the "dict" dictionnary in the
13278 * current scope
13279 */
13280#define DICT_FREE(str) \
13281 if ((str) && ((!dict) || \
13282 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13283 xmlFree((char *)(str));
13284
13285/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013286 * xmlCtxtReset:
13287 * @ctxt: an XML parser context
13288 *
13289 * Reset a parser context
13290 */
13291void
13292xmlCtxtReset(xmlParserCtxtPtr ctxt)
13293{
13294 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013295 xmlDictPtr dict;
13296
13297 if (ctxt == NULL)
13298 return;
13299
13300 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013301
13302 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13303 xmlFreeInputStream(input);
13304 }
13305 ctxt->inputNr = 0;
13306 ctxt->input = NULL;
13307
13308 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013309 if (ctxt->spaceTab != NULL) {
13310 ctxt->spaceTab[0] = -1;
13311 ctxt->space = &ctxt->spaceTab[0];
13312 } else {
13313 ctxt->space = NULL;
13314 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013315
13316
13317 ctxt->nodeNr = 0;
13318 ctxt->node = NULL;
13319
13320 ctxt->nameNr = 0;
13321 ctxt->name = NULL;
13322
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013323 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013324 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013325 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013326 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013327 DICT_FREE(ctxt->directory);
13328 ctxt->directory = NULL;
13329 DICT_FREE(ctxt->extSubURI);
13330 ctxt->extSubURI = NULL;
13331 DICT_FREE(ctxt->extSubSystem);
13332 ctxt->extSubSystem = NULL;
13333 if (ctxt->myDoc != NULL)
13334 xmlFreeDoc(ctxt->myDoc);
13335 ctxt->myDoc = NULL;
13336
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013337 ctxt->standalone = -1;
13338 ctxt->hasExternalSubset = 0;
13339 ctxt->hasPErefs = 0;
13340 ctxt->html = 0;
13341 ctxt->external = 0;
13342 ctxt->instate = XML_PARSER_START;
13343 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013344
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013345 ctxt->wellFormed = 1;
13346 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013347 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013348 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013349#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013350 ctxt->vctxt.userData = ctxt;
13351 ctxt->vctxt.error = xmlParserValidityError;
13352 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013353#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013354 ctxt->record_info = 0;
13355 ctxt->nbChars = 0;
13356 ctxt->checkIndex = 0;
13357 ctxt->inSubset = 0;
13358 ctxt->errNo = XML_ERR_OK;
13359 ctxt->depth = 0;
13360 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13361 ctxt->catalogs = NULL;
13362 xmlInitNodeInfoSeq(&ctxt->node_seq);
13363
13364 if (ctxt->attsDefault != NULL) {
13365 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13366 ctxt->attsDefault = NULL;
13367 }
13368 if (ctxt->attsSpecial != NULL) {
13369 xmlHashFree(ctxt->attsSpecial, NULL);
13370 ctxt->attsSpecial = NULL;
13371 }
13372
Daniel Veillard4432df22003-09-28 18:58:27 +000013373#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013374 if (ctxt->catalogs != NULL)
13375 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013376#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013377 if (ctxt->lastError.code != XML_ERR_OK)
13378 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013379}
13380
13381/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013382 * xmlCtxtResetPush:
13383 * @ctxt: an XML parser context
13384 * @chunk: a pointer to an array of chars
13385 * @size: number of chars in the array
13386 * @filename: an optional file name or URI
13387 * @encoding: the document encoding, or NULL
13388 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013389 * Reset a push parser context
13390 *
13391 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013392 */
13393int
13394xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13395 int size, const char *filename, const char *encoding)
13396{
13397 xmlParserInputPtr inputStream;
13398 xmlParserInputBufferPtr buf;
13399 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13400
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013401 if (ctxt == NULL)
13402 return(1);
13403
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013404 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13405 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13406
13407 buf = xmlAllocParserInputBuffer(enc);
13408 if (buf == NULL)
13409 return(1);
13410
13411 if (ctxt == NULL) {
13412 xmlFreeParserInputBuffer(buf);
13413 return(1);
13414 }
13415
13416 xmlCtxtReset(ctxt);
13417
13418 if (ctxt->pushTab == NULL) {
13419 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13420 sizeof(xmlChar *));
13421 if (ctxt->pushTab == NULL) {
13422 xmlErrMemory(ctxt, NULL);
13423 xmlFreeParserInputBuffer(buf);
13424 return(1);
13425 }
13426 }
13427
13428 if (filename == NULL) {
13429 ctxt->directory = NULL;
13430 } else {
13431 ctxt->directory = xmlParserGetDirectory(filename);
13432 }
13433
13434 inputStream = xmlNewInputStream(ctxt);
13435 if (inputStream == NULL) {
13436 xmlFreeParserInputBuffer(buf);
13437 return(1);
13438 }
13439
13440 if (filename == NULL)
13441 inputStream->filename = NULL;
13442 else
13443 inputStream->filename = (char *)
13444 xmlCanonicPath((const xmlChar *) filename);
13445 inputStream->buf = buf;
13446 inputStream->base = inputStream->buf->buffer->content;
13447 inputStream->cur = inputStream->buf->buffer->content;
13448 inputStream->end =
13449 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13450
13451 inputPush(ctxt, inputStream);
13452
13453 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13454 (ctxt->input->buf != NULL)) {
13455 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13456 int cur = ctxt->input->cur - ctxt->input->base;
13457
13458 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13459
13460 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13461 ctxt->input->cur = ctxt->input->base + cur;
13462 ctxt->input->end =
13463 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13464 use];
13465#ifdef DEBUG_PUSH
13466 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13467#endif
13468 }
13469
13470 if (encoding != NULL) {
13471 xmlCharEncodingHandlerPtr hdlr;
13472
13473 hdlr = xmlFindCharEncodingHandler(encoding);
13474 if (hdlr != NULL) {
13475 xmlSwitchToEncoding(ctxt, hdlr);
13476 } else {
13477 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13478 "Unsupported encoding %s\n", BAD_CAST encoding);
13479 }
13480 } else if (enc != XML_CHAR_ENCODING_NONE) {
13481 xmlSwitchEncoding(ctxt, enc);
13482 }
13483
13484 return(0);
13485}
13486
13487/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013488 * xmlCtxtUseOptions:
13489 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013490 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013491 *
13492 * Applies the options to the parser context
13493 *
13494 * Returns 0 in case of success, the set of unknown or unimplemented options
13495 * in case of error.
13496 */
13497int
13498xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13499{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013500 if (ctxt == NULL)
13501 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013502 if (options & XML_PARSE_RECOVER) {
13503 ctxt->recovery = 1;
13504 options -= XML_PARSE_RECOVER;
13505 } else
13506 ctxt->recovery = 0;
13507 if (options & XML_PARSE_DTDLOAD) {
13508 ctxt->loadsubset = XML_DETECT_IDS;
13509 options -= XML_PARSE_DTDLOAD;
13510 } else
13511 ctxt->loadsubset = 0;
13512 if (options & XML_PARSE_DTDATTR) {
13513 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13514 options -= XML_PARSE_DTDATTR;
13515 }
13516 if (options & XML_PARSE_NOENT) {
13517 ctxt->replaceEntities = 1;
13518 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13519 options -= XML_PARSE_NOENT;
13520 } else
13521 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013522 if (options & XML_PARSE_PEDANTIC) {
13523 ctxt->pedantic = 1;
13524 options -= XML_PARSE_PEDANTIC;
13525 } else
13526 ctxt->pedantic = 0;
13527 if (options & XML_PARSE_NOBLANKS) {
13528 ctxt->keepBlanks = 0;
13529 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13530 options -= XML_PARSE_NOBLANKS;
13531 } else
13532 ctxt->keepBlanks = 1;
13533 if (options & XML_PARSE_DTDVALID) {
13534 ctxt->validate = 1;
13535 if (options & XML_PARSE_NOWARNING)
13536 ctxt->vctxt.warning = NULL;
13537 if (options & XML_PARSE_NOERROR)
13538 ctxt->vctxt.error = NULL;
13539 options -= XML_PARSE_DTDVALID;
13540 } else
13541 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013542 if (options & XML_PARSE_NOWARNING) {
13543 ctxt->sax->warning = NULL;
13544 options -= XML_PARSE_NOWARNING;
13545 }
13546 if (options & XML_PARSE_NOERROR) {
13547 ctxt->sax->error = NULL;
13548 ctxt->sax->fatalError = NULL;
13549 options -= XML_PARSE_NOERROR;
13550 }
Daniel Veillard81273902003-09-30 00:43:48 +000013551#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013552 if (options & XML_PARSE_SAX1) {
13553 ctxt->sax->startElement = xmlSAX2StartElement;
13554 ctxt->sax->endElement = xmlSAX2EndElement;
13555 ctxt->sax->startElementNs = NULL;
13556 ctxt->sax->endElementNs = NULL;
13557 ctxt->sax->initialized = 1;
13558 options -= XML_PARSE_SAX1;
13559 }
Daniel Veillard81273902003-09-30 00:43:48 +000013560#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013561 if (options & XML_PARSE_NODICT) {
13562 ctxt->dictNames = 0;
13563 options -= XML_PARSE_NODICT;
13564 } else {
13565 ctxt->dictNames = 1;
13566 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013567 if (options & XML_PARSE_NOCDATA) {
13568 ctxt->sax->cdataBlock = NULL;
13569 options -= XML_PARSE_NOCDATA;
13570 }
13571 if (options & XML_PARSE_NSCLEAN) {
13572 ctxt->options |= XML_PARSE_NSCLEAN;
13573 options -= XML_PARSE_NSCLEAN;
13574 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013575 if (options & XML_PARSE_NONET) {
13576 ctxt->options |= XML_PARSE_NONET;
13577 options -= XML_PARSE_NONET;
13578 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013579 if (options & XML_PARSE_COMPACT) {
13580 ctxt->options |= XML_PARSE_COMPACT;
13581 options -= XML_PARSE_COMPACT;
13582 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013583 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013584 return (options);
13585}
13586
13587/**
13588 * xmlDoRead:
13589 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013590 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013591 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013592 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013593 * @reuse: keep the context for reuse
13594 *
13595 * Common front-end for the xmlRead functions
13596 *
13597 * Returns the resulting document tree or NULL
13598 */
13599static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013600xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13601 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013602{
13603 xmlDocPtr ret;
13604
13605 xmlCtxtUseOptions(ctxt, options);
13606 if (encoding != NULL) {
13607 xmlCharEncodingHandlerPtr hdlr;
13608
13609 hdlr = xmlFindCharEncodingHandler(encoding);
13610 if (hdlr != NULL)
13611 xmlSwitchToEncoding(ctxt, hdlr);
13612 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013613 if ((URL != NULL) && (ctxt->input != NULL) &&
13614 (ctxt->input->filename == NULL))
13615 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013616 xmlParseDocument(ctxt);
13617 if ((ctxt->wellFormed) || ctxt->recovery)
13618 ret = ctxt->myDoc;
13619 else {
13620 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013621 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013622 xmlFreeDoc(ctxt->myDoc);
13623 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013624 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013625 ctxt->myDoc = NULL;
13626 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013627 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013628 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013629
13630 return (ret);
13631}
13632
13633/**
13634 * xmlReadDoc:
13635 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013636 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013637 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013638 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013639 *
13640 * parse an XML in-memory document and build a tree.
13641 *
13642 * Returns the resulting document tree
13643 */
13644xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013645xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013646{
13647 xmlParserCtxtPtr ctxt;
13648
13649 if (cur == NULL)
13650 return (NULL);
13651
13652 ctxt = xmlCreateDocParserCtxt(cur);
13653 if (ctxt == NULL)
13654 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013655 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013656}
13657
13658/**
13659 * xmlReadFile:
13660 * @filename: a file or URL
13661 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013662 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013663 *
13664 * parse an XML file from the filesystem or the network.
13665 *
13666 * Returns the resulting document tree
13667 */
13668xmlDocPtr
13669xmlReadFile(const char *filename, const char *encoding, int options)
13670{
13671 xmlParserCtxtPtr ctxt;
13672
Daniel Veillard61b93382003-11-03 14:28:31 +000013673 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013674 if (ctxt == NULL)
13675 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013676 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013677}
13678
13679/**
13680 * xmlReadMemory:
13681 * @buffer: a pointer to a char array
13682 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013683 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013684 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013685 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013686 *
13687 * parse an XML in-memory document and build a tree.
13688 *
13689 * Returns the resulting document tree
13690 */
13691xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013692xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013693{
13694 xmlParserCtxtPtr ctxt;
13695
13696 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13697 if (ctxt == NULL)
13698 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013699 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013700}
13701
13702/**
13703 * xmlReadFd:
13704 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013705 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013706 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013707 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013708 *
13709 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013710 * NOTE that the file descriptor will not be closed when the
13711 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013712 *
13713 * Returns the resulting document tree
13714 */
13715xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013716xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013717{
13718 xmlParserCtxtPtr ctxt;
13719 xmlParserInputBufferPtr input;
13720 xmlParserInputPtr stream;
13721
13722 if (fd < 0)
13723 return (NULL);
13724
13725 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13726 if (input == NULL)
13727 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013728 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013729 ctxt = xmlNewParserCtxt();
13730 if (ctxt == NULL) {
13731 xmlFreeParserInputBuffer(input);
13732 return (NULL);
13733 }
13734 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13735 if (stream == NULL) {
13736 xmlFreeParserInputBuffer(input);
13737 xmlFreeParserCtxt(ctxt);
13738 return (NULL);
13739 }
13740 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013741 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013742}
13743
13744/**
13745 * xmlReadIO:
13746 * @ioread: an I/O read function
13747 * @ioclose: an I/O close function
13748 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013749 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013750 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013751 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013752 *
13753 * parse an XML document from I/O functions and source and build a tree.
13754 *
13755 * Returns the resulting document tree
13756 */
13757xmlDocPtr
13758xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013759 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013760{
13761 xmlParserCtxtPtr ctxt;
13762 xmlParserInputBufferPtr input;
13763 xmlParserInputPtr stream;
13764
13765 if (ioread == NULL)
13766 return (NULL);
13767
13768 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13769 XML_CHAR_ENCODING_NONE);
13770 if (input == NULL)
13771 return (NULL);
13772 ctxt = xmlNewParserCtxt();
13773 if (ctxt == NULL) {
13774 xmlFreeParserInputBuffer(input);
13775 return (NULL);
13776 }
13777 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13778 if (stream == NULL) {
13779 xmlFreeParserInputBuffer(input);
13780 xmlFreeParserCtxt(ctxt);
13781 return (NULL);
13782 }
13783 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013784 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013785}
13786
13787/**
13788 * xmlCtxtReadDoc:
13789 * @ctxt: an XML parser context
13790 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013791 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013792 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013793 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013794 *
13795 * parse an XML in-memory document and build a tree.
13796 * This reuses the existing @ctxt parser context
13797 *
13798 * Returns the resulting document tree
13799 */
13800xmlDocPtr
13801xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013802 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013803{
13804 xmlParserInputPtr stream;
13805
13806 if (cur == NULL)
13807 return (NULL);
13808 if (ctxt == NULL)
13809 return (NULL);
13810
13811 xmlCtxtReset(ctxt);
13812
13813 stream = xmlNewStringInputStream(ctxt, cur);
13814 if (stream == NULL) {
13815 return (NULL);
13816 }
13817 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013818 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013819}
13820
13821/**
13822 * xmlCtxtReadFile:
13823 * @ctxt: an XML parser context
13824 * @filename: a file or URL
13825 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013826 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013827 *
13828 * parse an XML file from the filesystem or the network.
13829 * This reuses the existing @ctxt parser context
13830 *
13831 * Returns the resulting document tree
13832 */
13833xmlDocPtr
13834xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13835 const char *encoding, int options)
13836{
13837 xmlParserInputPtr stream;
13838
13839 if (filename == NULL)
13840 return (NULL);
13841 if (ctxt == NULL)
13842 return (NULL);
13843
13844 xmlCtxtReset(ctxt);
13845
Daniel Veillard29614c72004-11-26 10:47:26 +000013846 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013847 if (stream == NULL) {
13848 return (NULL);
13849 }
13850 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013851 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013852}
13853
13854/**
13855 * xmlCtxtReadMemory:
13856 * @ctxt: an XML parser context
13857 * @buffer: a pointer to a char array
13858 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013859 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013860 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013861 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013862 *
13863 * parse an XML in-memory document and build a tree.
13864 * This reuses the existing @ctxt parser context
13865 *
13866 * Returns the resulting document tree
13867 */
13868xmlDocPtr
13869xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013870 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013871{
13872 xmlParserInputBufferPtr input;
13873 xmlParserInputPtr stream;
13874
13875 if (ctxt == NULL)
13876 return (NULL);
13877 if (buffer == NULL)
13878 return (NULL);
13879
13880 xmlCtxtReset(ctxt);
13881
13882 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13883 if (input == NULL) {
13884 return(NULL);
13885 }
13886
13887 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13888 if (stream == NULL) {
13889 xmlFreeParserInputBuffer(input);
13890 return(NULL);
13891 }
13892
13893 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013894 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013895}
13896
13897/**
13898 * xmlCtxtReadFd:
13899 * @ctxt: an XML parser context
13900 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013901 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013902 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013903 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013904 *
13905 * parse an XML from a file descriptor and build a tree.
13906 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013907 * NOTE that the file descriptor will not be closed when the
13908 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013909 *
13910 * Returns the resulting document tree
13911 */
13912xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013913xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13914 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013915{
13916 xmlParserInputBufferPtr input;
13917 xmlParserInputPtr stream;
13918
13919 if (fd < 0)
13920 return (NULL);
13921 if (ctxt == NULL)
13922 return (NULL);
13923
13924 xmlCtxtReset(ctxt);
13925
13926
13927 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13928 if (input == NULL)
13929 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013930 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13932 if (stream == NULL) {
13933 xmlFreeParserInputBuffer(input);
13934 return (NULL);
13935 }
13936 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013937 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013938}
13939
13940/**
13941 * xmlCtxtReadIO:
13942 * @ctxt: an XML parser context
13943 * @ioread: an I/O read function
13944 * @ioclose: an I/O close function
13945 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013946 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013947 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013948 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013949 *
13950 * parse an XML document from I/O functions and source and build a tree.
13951 * This reuses the existing @ctxt parser context
13952 *
13953 * Returns the resulting document tree
13954 */
13955xmlDocPtr
13956xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13957 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013958 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013959 const char *encoding, int options)
13960{
13961 xmlParserInputBufferPtr input;
13962 xmlParserInputPtr stream;
13963
13964 if (ioread == NULL)
13965 return (NULL);
13966 if (ctxt == NULL)
13967 return (NULL);
13968
13969 xmlCtxtReset(ctxt);
13970
13971 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13972 XML_CHAR_ENCODING_NONE);
13973 if (input == NULL)
13974 return (NULL);
13975 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13976 if (stream == NULL) {
13977 xmlFreeParserInputBuffer(input);
13978 return (NULL);
13979 }
13980 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013981 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013982}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013983
13984#define bottom_parser
13985#include "elfgcchack.h"