blob: e0f30e767c39258eb5d5d97b22a3be4bbd25da59 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001019 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1020 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001021 xmlDefAttrsPtr temp;
1022
1023 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001026 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001027 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 defaults->maxAttrs *= 2;
1029 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1030 }
1031
1032 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001033 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 * are within the DTD and hen not associated to namespace names.
1035 */
1036 name = xmlSplitQName3(fullattr, &len);
1037 if (name == NULL) {
1038 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1039 prefix = NULL;
1040 } else {
1041 name = xmlDictLookup(ctxt->dict, name, -1);
1042 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1043 }
1044
1045 defaults->values[4 * defaults->nbAttrs] = name;
1046 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1047 /* intern the string and precompute the end */
1048 len = xmlStrlen(value);
1049 value = xmlDictLookup(ctxt->dict, value, len);
1050 defaults->values[4 * defaults->nbAttrs + 2] = value;
1051 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1052 defaults->nbAttrs++;
1053
1054 return;
1055
1056mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001057 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001058 return;
1059}
1060
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001061/**
1062 * xmlAddSpecialAttr:
1063 * @ctxt: an XML parser context
1064 * @fullname: the element fullname
1065 * @fullattr: the attribute fullname
1066 * @type: the attribute type
1067 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001068 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069 */
1070static void
1071xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1072 const xmlChar *fullname,
1073 const xmlChar *fullattr,
1074 int type)
1075{
1076 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001077 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001078 if (ctxt->attsSpecial == NULL)
1079 goto mem_error;
1080 }
1081
Daniel Veillardac4118d2008-01-11 05:27:32 +00001082 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1083 return;
1084
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001085 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1086 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001087 return;
1088
1089mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001090 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001091 return;
1092}
1093
Daniel Veillard4432df22003-09-28 18:58:27 +00001094/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001095 * xmlCleanSpecialAttrCallback:
1096 *
1097 * Removes CDATA attributes from the special attribute table
1098 */
1099static void
1100xmlCleanSpecialAttrCallback(void *payload, void *data,
1101 const xmlChar *fullname, const xmlChar *fullattr,
1102 const xmlChar *unused ATTRIBUTE_UNUSED) {
1103 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1104
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001105 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001106 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1107 }
1108}
1109
1110/**
1111 * xmlCleanSpecialAttr:
1112 * @ctxt: an XML parser context
1113 *
1114 * Trim the list of attributes defined to remove all those of type
1115 * CDATA as they are not special. This call should be done when finishing
1116 * to parse the DTD and before starting to parse the document root.
1117 */
1118static void
1119xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1120{
1121 if (ctxt->attsSpecial == NULL)
1122 return;
1123
1124 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1125
1126 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1127 xmlHashFree(ctxt->attsSpecial, NULL);
1128 ctxt->attsSpecial = NULL;
1129 }
1130 return;
1131}
1132
1133/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001134 * xmlCheckLanguageID:
1135 * @lang: pointer to the string value
1136 *
1137 * Checks that the value conforms to the LanguageID production:
1138 *
1139 * NOTE: this is somewhat deprecated, those productions were removed from
1140 * the XML Second edition.
1141 *
1142 * [33] LanguageID ::= Langcode ('-' Subcode)*
1143 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1144 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1145 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1146 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1147 * [38] Subcode ::= ([a-z] | [A-Z])+
1148 *
1149 * Returns 1 if correct 0 otherwise
1150 **/
1151int
1152xmlCheckLanguageID(const xmlChar * lang)
1153{
1154 const xmlChar *cur = lang;
1155
1156 if (cur == NULL)
1157 return (0);
1158 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1159 ((cur[0] == 'I') && (cur[1] == '-'))) {
1160 /*
1161 * IANA code
1162 */
1163 cur += 2;
1164 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1165 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1166 cur++;
1167 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1168 ((cur[0] == 'X') && (cur[1] == '-'))) {
1169 /*
1170 * User code
1171 */
1172 cur += 2;
1173 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1174 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1175 cur++;
1176 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1177 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1178 /*
1179 * ISO639
1180 */
1181 cur++;
1182 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1183 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1184 cur++;
1185 else
1186 return (0);
1187 } else
1188 return (0);
1189 while (cur[0] != 0) { /* non input consuming */
1190 if (cur[0] != '-')
1191 return (0);
1192 cur++;
1193 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1194 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1195 cur++;
1196 else
1197 return (0);
1198 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1199 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1200 cur++;
1201 }
1202 return (1);
1203}
1204
Owen Taylor3473f882001-02-23 17:55:21 +00001205/************************************************************************
1206 * *
1207 * Parser stacks related functions and macros *
1208 * *
1209 ************************************************************************/
1210
1211xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1212 const xmlChar ** str);
1213
Daniel Veillard0fb18932003-09-07 09:14:37 +00001214#ifdef SAX2
1215/**
1216 * nsPush:
1217 * @ctxt: an XML parser context
1218 * @prefix: the namespace prefix or NULL
1219 * @URL: the namespace name
1220 *
1221 * Pushes a new parser namespace on top of the ns stack
1222 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001223 * Returns -1 in case of error, -2 if the namespace should be discarded
1224 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001225 */
1226static int
1227nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1228{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001229 if (ctxt->options & XML_PARSE_NSCLEAN) {
1230 int i;
1231 for (i = 0;i < ctxt->nsNr;i += 2) {
1232 if (ctxt->nsTab[i] == prefix) {
1233 /* in scope */
1234 if (ctxt->nsTab[i + 1] == URL)
1235 return(-2);
1236 /* out of scope keep it */
1237 break;
1238 }
1239 }
1240 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001241 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1242 ctxt->nsMax = 10;
1243 ctxt->nsNr = 0;
1244 ctxt->nsTab = (const xmlChar **)
1245 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1246 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001247 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001248 ctxt->nsMax = 0;
1249 return (-1);
1250 }
1251 } else if (ctxt->nsNr >= ctxt->nsMax) {
1252 ctxt->nsMax *= 2;
1253 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001254 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001255 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1256 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001257 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001258 ctxt->nsMax /= 2;
1259 return (-1);
1260 }
1261 }
1262 ctxt->nsTab[ctxt->nsNr++] = prefix;
1263 ctxt->nsTab[ctxt->nsNr++] = URL;
1264 return (ctxt->nsNr);
1265}
1266/**
1267 * nsPop:
1268 * @ctxt: an XML parser context
1269 * @nr: the number to pop
1270 *
1271 * Pops the top @nr parser prefix/namespace from the ns stack
1272 *
1273 * Returns the number of namespaces removed
1274 */
1275static int
1276nsPop(xmlParserCtxtPtr ctxt, int nr)
1277{
1278 int i;
1279
1280 if (ctxt->nsTab == NULL) return(0);
1281 if (ctxt->nsNr < nr) {
1282 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1283 nr = ctxt->nsNr;
1284 }
1285 if (ctxt->nsNr <= 0)
1286 return (0);
1287
1288 for (i = 0;i < nr;i++) {
1289 ctxt->nsNr--;
1290 ctxt->nsTab[ctxt->nsNr] = NULL;
1291 }
1292 return(nr);
1293}
1294#endif
1295
1296static int
1297xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1298 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001300 int maxatts;
1301
1302 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001303 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001304 atts = (const xmlChar **)
1305 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001306 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001307 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1309 if (attallocs == NULL) goto mem_error;
1310 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001311 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001312 } else if (nr + 5 > ctxt->maxatts) {
1313 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001314 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1315 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001316 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001317 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001318 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1319 (maxatts / 5) * sizeof(int));
1320 if (attallocs == NULL) goto mem_error;
1321 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001322 ctxt->maxatts = maxatts;
1323 }
1324 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001326 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001327 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001328}
1329
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001330/**
1331 * inputPush:
1332 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001333 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001334 *
1335 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001336 *
1337 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001338 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001339int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1341{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001342 if ((ctxt == NULL) || (value == NULL))
1343 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001344 if (ctxt->inputNr >= ctxt->inputMax) {
1345 ctxt->inputMax *= 2;
1346 ctxt->inputTab =
1347 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1348 ctxt->inputMax *
1349 sizeof(ctxt->inputTab[0]));
1350 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001351 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001352 return (0);
1353 }
1354 }
1355 ctxt->inputTab[ctxt->inputNr] = value;
1356 ctxt->input = value;
1357 return (ctxt->inputNr++);
1358}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001359/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001360 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001361 * @ctxt: an XML parser context
1362 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001363 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001364 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001365 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001366 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001367xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001368inputPop(xmlParserCtxtPtr ctxt)
1369{
1370 xmlParserInputPtr ret;
1371
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001372 if (ctxt == NULL)
1373 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001374 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001375 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001376 ctxt->inputNr--;
1377 if (ctxt->inputNr > 0)
1378 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1379 else
1380 ctxt->input = NULL;
1381 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001382 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 return (ret);
1384}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001385/**
1386 * nodePush:
1387 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001388 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001389 *
1390 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001391 *
1392 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001393 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001394int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001395nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1396{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001397 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001398 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001399 xmlNodePtr *tmp;
1400
1401 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1402 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001404 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001405 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001406 return (0);
1407 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001408 ctxt->nodeTab = tmp;
1409 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001410 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001411 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001412 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001413 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1414 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001415 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001416 return(0);
1417 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001418 ctxt->nodeTab[ctxt->nodeNr] = value;
1419 ctxt->node = value;
1420 return (ctxt->nodeNr++);
1421}
1422/**
1423 * nodePop:
1424 * @ctxt: an XML parser context
1425 *
1426 * Pops the top element node from the node stack
1427 *
1428 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001429 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001430xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001431nodePop(xmlParserCtxtPtr ctxt)
1432{
1433 xmlNodePtr ret;
1434
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001435 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001436 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001437 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001438 ctxt->nodeNr--;
1439 if (ctxt->nodeNr > 0)
1440 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1441 else
1442 ctxt->node = NULL;
1443 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001444 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 return (ret);
1446}
Daniel Veillarda2351322004-06-27 12:08:10 +00001447
1448#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001449/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001450 * nameNsPush:
1451 * @ctxt: an XML parser context
1452 * @value: the element name
1453 * @prefix: the element prefix
1454 * @URI: the element namespace name
1455 *
1456 * Pushes a new element name/prefix/URL on top of the name stack
1457 *
1458 * Returns -1 in case of error, the index in the stack otherwise
1459 */
1460static int
1461nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1462 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1463{
1464 if (ctxt->nameNr >= ctxt->nameMax) {
1465 const xmlChar * *tmp;
1466 void **tmp2;
1467 ctxt->nameMax *= 2;
1468 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1469 ctxt->nameMax *
1470 sizeof(ctxt->nameTab[0]));
1471 if (tmp == NULL) {
1472 ctxt->nameMax /= 2;
1473 goto mem_error;
1474 }
1475 ctxt->nameTab = tmp;
1476 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1477 ctxt->nameMax * 3 *
1478 sizeof(ctxt->pushTab[0]));
1479 if (tmp2 == NULL) {
1480 ctxt->nameMax /= 2;
1481 goto mem_error;
1482 }
1483 ctxt->pushTab = tmp2;
1484 }
1485 ctxt->nameTab[ctxt->nameNr] = value;
1486 ctxt->name = value;
1487 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1488 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001489 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001490 return (ctxt->nameNr++);
1491mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001492 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001493 return (-1);
1494}
1495/**
1496 * nameNsPop:
1497 * @ctxt: an XML parser context
1498 *
1499 * Pops the top element/prefix/URI name from the name stack
1500 *
1501 * Returns the name just removed
1502 */
1503static const xmlChar *
1504nameNsPop(xmlParserCtxtPtr ctxt)
1505{
1506 const xmlChar *ret;
1507
1508 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001509 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001510 ctxt->nameNr--;
1511 if (ctxt->nameNr > 0)
1512 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1513 else
1514 ctxt->name = NULL;
1515 ret = ctxt->nameTab[ctxt->nameNr];
1516 ctxt->nameTab[ctxt->nameNr] = NULL;
1517 return (ret);
1518}
Daniel Veillarda2351322004-06-27 12:08:10 +00001519#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001520
1521/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001522 * namePush:
1523 * @ctxt: an XML parser context
1524 * @value: the element name
1525 *
1526 * Pushes a new element name on top of the name stack
1527 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001528 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001529 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001530int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001531namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001532{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001533 if (ctxt == NULL) return (-1);
1534
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001536 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001537 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001538 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001539 ctxt->nameMax *
1540 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001541 if (tmp == NULL) {
1542 ctxt->nameMax /= 2;
1543 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001544 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001545 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001546 }
1547 ctxt->nameTab[ctxt->nameNr] = value;
1548 ctxt->name = value;
1549 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001550mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001551 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001552 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553}
1554/**
1555 * namePop:
1556 * @ctxt: an XML parser context
1557 *
1558 * Pops the top element name from the name stack
1559 *
1560 * Returns the name just removed
1561 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001562const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001563namePop(xmlParserCtxtPtr ctxt)
1564{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001565 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001566
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001567 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1568 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001569 ctxt->nameNr--;
1570 if (ctxt->nameNr > 0)
1571 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1572 else
1573 ctxt->name = NULL;
1574 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001575 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 return (ret);
1577}
Owen Taylor3473f882001-02-23 17:55:21 +00001578
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001579static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001580 if (ctxt->spaceNr >= ctxt->spaceMax) {
1581 ctxt->spaceMax *= 2;
1582 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1583 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1584 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001586 return(0);
1587 }
1588 }
1589 ctxt->spaceTab[ctxt->spaceNr] = val;
1590 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1591 return(ctxt->spaceNr++);
1592}
1593
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001594static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 int ret;
1596 if (ctxt->spaceNr <= 0) return(0);
1597 ctxt->spaceNr--;
1598 if (ctxt->spaceNr > 0)
1599 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1600 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001601 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001602 ret = ctxt->spaceTab[ctxt->spaceNr];
1603 ctxt->spaceTab[ctxt->spaceNr] = -1;
1604 return(ret);
1605}
1606
1607/*
1608 * Macros for accessing the content. Those should be used only by the parser,
1609 * and not exported.
1610 *
1611 * Dirty macros, i.e. one often need to make assumption on the context to
1612 * use them
1613 *
1614 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1615 * To be used with extreme caution since operations consuming
1616 * characters may move the input buffer to a different location !
1617 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1618 * This should be used internally by the parser
1619 * only to compare to ASCII values otherwise it would break when
1620 * running with UTF-8 encoding.
1621 * RAW same as CUR but in the input buffer, bypass any token
1622 * extraction that may have been done
1623 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1624 * to compare on ASCII based substring.
1625 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001626 * strings without newlines within the parser.
1627 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1628 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001629 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1630 *
1631 * NEXT Skip to the next character, this does the proper decoding
1632 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001633 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001634 * CUR_CHAR(l) returns the current unicode character (int), set l
1635 * to the number of xmlChars used for the encoding [0-5].
1636 * CUR_SCHAR same but operate on a string instead of the context
1637 * COPY_BUF copy the current unicode char to the target buffer, increment
1638 * the index
1639 * GROW, SHRINK handling of input buffers
1640 */
1641
Daniel Veillardfdc91562002-07-01 21:52:03 +00001642#define RAW (*ctxt->input->cur)
1643#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001644#define NXT(val) ctxt->input->cur[(val)]
1645#define CUR_PTR ctxt->input->cur
1646
Daniel Veillarda07050d2003-10-19 14:46:32 +00001647#define CMP4( s, c1, c2, c3, c4 ) \
1648 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1649 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1650#define CMP5( s, c1, c2, c3, c4, c5 ) \
1651 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1652#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1653 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1654#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1655 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1656#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1657 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1658#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1659 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1660 ((unsigned char *) s)[ 8 ] == c9 )
1661#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1662 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1663 ((unsigned char *) s)[ 9 ] == c10 )
1664
Owen Taylor3473f882001-02-23 17:55:21 +00001665#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001666 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001667 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001668 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001669 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1670 xmlPopInput(ctxt); \
1671 } while (0)
1672
Daniel Veillard0b787f32004-03-26 17:29:53 +00001673#define SKIPL(val) do { \
1674 int skipl; \
1675 for(skipl=0; skipl<val; skipl++) { \
1676 if (*(ctxt->input->cur) == '\n') { \
1677 ctxt->input->line++; ctxt->input->col = 1; \
1678 } else ctxt->input->col++; \
1679 ctxt->nbChars++; \
1680 ctxt->input->cur++; \
1681 } \
1682 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1683 if ((*ctxt->input->cur == 0) && \
1684 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1685 xmlPopInput(ctxt); \
1686 } while (0)
1687
Daniel Veillarda880b122003-04-21 21:36:41 +00001688#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001689 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1690 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001691 xmlSHRINK (ctxt);
1692
1693static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1694 xmlParserInputShrink(ctxt->input);
1695 if ((*ctxt->input->cur == 0) &&
1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1697 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001698 }
Owen Taylor3473f882001-02-23 17:55:21 +00001699
Daniel Veillarda880b122003-04-21 21:36:41 +00001700#define GROW if ((ctxt->progressive == 0) && \
1701 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001702 xmlGROW (ctxt);
1703
1704static void xmlGROW (xmlParserCtxtPtr ctxt) {
1705 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1706 if ((*ctxt->input->cur == 0) &&
1707 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1708 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001709}
Owen Taylor3473f882001-02-23 17:55:21 +00001710
1711#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1712
1713#define NEXT xmlNextChar(ctxt)
1714
Daniel Veillard21a0f912001-02-25 19:54:14 +00001715#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001716 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001717 ctxt->input->cur++; \
1718 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001719 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001720 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1721 }
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723#define NEXTL(l) do { \
1724 if (*(ctxt->input->cur) == '\n') { \
1725 ctxt->input->line++; ctxt->input->col = 1; \
1726 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001727 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001728 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001729 } while (0)
1730
1731#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1732#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1733
1734#define COPY_BUF(l,b,i,v) \
1735 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001736 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001737
1738/**
1739 * xmlSkipBlankChars:
1740 * @ctxt: the XML parser context
1741 *
1742 * skip all blanks character found at that point in the input streams.
1743 * It pops up finished entities in the process if allowable at that point.
1744 *
1745 * Returns the number of space chars skipped
1746 */
1747
1748int
1749xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001750 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001751
1752 /*
1753 * It's Okay to use CUR/NEXT here since all the blanks are on
1754 * the ASCII range.
1755 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001756 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1757 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001758 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001759 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001760 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001761 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001762 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001763 if (*cur == '\n') {
1764 ctxt->input->line++; ctxt->input->col = 1;
1765 }
1766 cur++;
1767 res++;
1768 if (*cur == 0) {
1769 ctxt->input->cur = cur;
1770 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1771 cur = ctxt->input->cur;
1772 }
1773 }
1774 ctxt->input->cur = cur;
1775 } else {
1776 int cur;
1777 do {
1778 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001779 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001780 NEXT;
1781 cur = CUR;
1782 res++;
1783 }
1784 while ((cur == 0) && (ctxt->inputNr > 1) &&
1785 (ctxt->instate != XML_PARSER_COMMENT)) {
1786 xmlPopInput(ctxt);
1787 cur = CUR;
1788 }
1789 /*
1790 * Need to handle support of entities branching here
1791 */
1792 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1793 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1794 }
Owen Taylor3473f882001-02-23 17:55:21 +00001795 return(res);
1796}
1797
1798/************************************************************************
1799 * *
1800 * Commodity functions to handle entities *
1801 * *
1802 ************************************************************************/
1803
1804/**
1805 * xmlPopInput:
1806 * @ctxt: an XML parser context
1807 *
1808 * xmlPopInput: the current input pointed by ctxt->input came to an end
1809 * pop it and return the next char.
1810 *
1811 * Returns the current xmlChar in the parser context
1812 */
1813xmlChar
1814xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001815 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001816 if (xmlParserDebugEntities)
1817 xmlGenericError(xmlGenericErrorContext,
1818 "Popping input %d\n", ctxt->inputNr);
1819 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001820 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001821 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1822 return(xmlPopInput(ctxt));
1823 return(CUR);
1824}
1825
1826/**
1827 * xmlPushInput:
1828 * @ctxt: an XML parser context
1829 * @input: an XML parser input fragment (entity, XML fragment ...).
1830 *
1831 * xmlPushInput: switch to a new input stream which is stacked on top
1832 * of the previous one(s).
1833 */
1834void
1835xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1836 if (input == NULL) return;
1837
1838 if (xmlParserDebugEntities) {
1839 if ((ctxt->input != NULL) && (ctxt->input->filename))
1840 xmlGenericError(xmlGenericErrorContext,
1841 "%s(%d): ", ctxt->input->filename,
1842 ctxt->input->line);
1843 xmlGenericError(xmlGenericErrorContext,
1844 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1845 }
1846 inputPush(ctxt, input);
1847 GROW;
1848}
1849
1850/**
1851 * xmlParseCharRef:
1852 * @ctxt: an XML parser context
1853 *
1854 * parse Reference declarations
1855 *
1856 * [66] CharRef ::= '&#' [0-9]+ ';' |
1857 * '&#x' [0-9a-fA-F]+ ';'
1858 *
1859 * [ WFC: Legal Character ]
1860 * Characters referred to using character references must match the
1861 * production for Char.
1862 *
1863 * Returns the value parsed (as an int), 0 in case of error
1864 */
1865int
1866xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001867 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001868 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001869 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001870
Owen Taylor3473f882001-02-23 17:55:21 +00001871 /*
1872 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1873 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001874 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001875 (NXT(2) == 'x')) {
1876 SKIP(3);
1877 GROW;
1878 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001879 if (count++ > 20) {
1880 count = 0;
1881 GROW;
1882 }
1883 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001884 val = val * 16 + (CUR - '0');
1885 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1886 val = val * 16 + (CUR - 'a') + 10;
1887 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1888 val = val * 16 + (CUR - 'A') + 10;
1889 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001890 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 val = 0;
1892 break;
1893 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001894 if (val > 0x10FFFF)
1895 outofrange = val;
1896
Owen Taylor3473f882001-02-23 17:55:21 +00001897 NEXT;
1898 count++;
1899 }
1900 if (RAW == ';') {
1901 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001902 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001903 ctxt->nbChars ++;
1904 ctxt->input->cur++;
1905 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001906 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001907 SKIP(2);
1908 GROW;
1909 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001910 if (count++ > 20) {
1911 count = 0;
1912 GROW;
1913 }
1914 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001915 val = val * 10 + (CUR - '0');
1916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001917 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001918 val = 0;
1919 break;
1920 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001921 if (val > 0x10FFFF)
1922 outofrange = val;
1923
Owen Taylor3473f882001-02-23 17:55:21 +00001924 NEXT;
1925 count++;
1926 }
1927 if (RAW == ';') {
1928 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001929 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001930 ctxt->nbChars ++;
1931 ctxt->input->cur++;
1932 }
1933 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001934 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001935 }
1936
1937 /*
1938 * [ WFC: Legal Character ]
1939 * Characters referred to using character references must match the
1940 * production for Char.
1941 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001942 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(val);
1944 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001945 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1946 "xmlParseCharRef: invalid xmlChar value %d\n",
1947 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 }
1949 return(0);
1950}
1951
1952/**
1953 * xmlParseStringCharRef:
1954 * @ctxt: an XML parser context
1955 * @str: a pointer to an index in the string
1956 *
1957 * parse Reference declarations, variant parsing from a string rather
1958 * than an an input flow.
1959 *
1960 * [66] CharRef ::= '&#' [0-9]+ ';' |
1961 * '&#x' [0-9a-fA-F]+ ';'
1962 *
1963 * [ WFC: Legal Character ]
1964 * Characters referred to using character references must match the
1965 * production for Char.
1966 *
1967 * Returns the value parsed (as an int), 0 in case of error, str will be
1968 * updated to the current value of the index
1969 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001970static int
Owen Taylor3473f882001-02-23 17:55:21 +00001971xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1972 const xmlChar *ptr;
1973 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001974 unsigned int val = 0;
1975 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001976
1977 if ((str == NULL) || (*str == NULL)) return(0);
1978 ptr = *str;
1979 cur = *ptr;
1980 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1981 ptr += 3;
1982 cur = *ptr;
1983 while (cur != ';') { /* Non input consuming loop */
1984 if ((cur >= '0') && (cur <= '9'))
1985 val = val * 16 + (cur - '0');
1986 else if ((cur >= 'a') && (cur <= 'f'))
1987 val = val * 16 + (cur - 'a') + 10;
1988 else if ((cur >= 'A') && (cur <= 'F'))
1989 val = val * 16 + (cur - 'A') + 10;
1990 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001991 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001992 val = 0;
1993 break;
1994 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001995 if (val > 0x10FFFF)
1996 outofrange = val;
1997
Owen Taylor3473f882001-02-23 17:55:21 +00001998 ptr++;
1999 cur = *ptr;
2000 }
2001 if (cur == ';')
2002 ptr++;
2003 } else if ((cur == '&') && (ptr[1] == '#')){
2004 ptr += 2;
2005 cur = *ptr;
2006 while (cur != ';') { /* Non input consuming loops */
2007 if ((cur >= '0') && (cur <= '9'))
2008 val = val * 10 + (cur - '0');
2009 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 val = 0;
2012 break;
2013 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002014 if (val > 0x10FFFF)
2015 outofrange = val;
2016
Owen Taylor3473f882001-02-23 17:55:21 +00002017 ptr++;
2018 cur = *ptr;
2019 }
2020 if (cur == ';')
2021 ptr++;
2022 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002023 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002024 return(0);
2025 }
2026 *str = ptr;
2027
2028 /*
2029 * [ WFC: Legal Character ]
2030 * Characters referred to using character references must match the
2031 * production for Char.
2032 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002033 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(val);
2035 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002036 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2037 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2038 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002039 }
2040 return(0);
2041}
2042
2043/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002044 * xmlNewBlanksWrapperInputStream:
2045 * @ctxt: an XML parser context
2046 * @entity: an Entity pointer
2047 *
2048 * Create a new input stream for wrapping
2049 * blanks around a PEReference
2050 *
2051 * Returns the new input stream or NULL
2052 */
2053
2054static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2055
Daniel Veillardf4862f02002-09-10 11:13:43 +00002056static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002057xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2058 xmlParserInputPtr input;
2059 xmlChar *buffer;
2060 size_t length;
2061 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2063 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002064 return(NULL);
2065 }
2066 if (xmlParserDebugEntities)
2067 xmlGenericError(xmlGenericErrorContext,
2068 "new blanks wrapper for entity: %s\n", entity->name);
2069 input = xmlNewInputStream(ctxt);
2070 if (input == NULL) {
2071 return(NULL);
2072 }
2073 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002074 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002075 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002076 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002077 return(NULL);
2078 }
2079 buffer [0] = ' ';
2080 buffer [1] = '%';
2081 buffer [length-3] = ';';
2082 buffer [length-2] = ' ';
2083 buffer [length-1] = 0;
2084 memcpy(buffer + 2, entity->name, length - 5);
2085 input->free = deallocblankswrapper;
2086 input->base = buffer;
2087 input->cur = buffer;
2088 input->length = length;
2089 input->end = &buffer[length];
2090 return(input);
2091}
2092
2093/**
Owen Taylor3473f882001-02-23 17:55:21 +00002094 * xmlParserHandlePEReference:
2095 * @ctxt: the parser context
2096 *
2097 * [69] PEReference ::= '%' Name ';'
2098 *
2099 * [ WFC: No Recursion ]
2100 * A parsed entity must not contain a recursive
2101 * reference to itself, either directly or indirectly.
2102 *
2103 * [ WFC: Entity Declared ]
2104 * In a document without any DTD, a document with only an internal DTD
2105 * subset which contains no parameter entity references, or a document
2106 * with "standalone='yes'", ... ... The declaration of a parameter
2107 * entity must precede any reference to it...
2108 *
2109 * [ VC: Entity Declared ]
2110 * In a document with an external subset or external parameter entities
2111 * with "standalone='no'", ... ... The declaration of a parameter entity
2112 * must precede any reference to it...
2113 *
2114 * [ WFC: In DTD ]
2115 * Parameter-entity references may only appear in the DTD.
2116 * NOTE: misleading but this is handled.
2117 *
2118 * A PEReference may have been detected in the current input stream
2119 * the handling is done accordingly to
2120 * http://www.w3.org/TR/REC-xml#entproc
2121 * i.e.
2122 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002123 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002124 */
2125void
2126xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002127 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002128 xmlEntityPtr entity = NULL;
2129 xmlParserInputPtr input;
2130
Owen Taylor3473f882001-02-23 17:55:21 +00002131 if (RAW != '%') return;
2132 switch(ctxt->instate) {
2133 case XML_PARSER_CDATA_SECTION:
2134 return;
2135 case XML_PARSER_COMMENT:
2136 return;
2137 case XML_PARSER_START_TAG:
2138 return;
2139 case XML_PARSER_END_TAG:
2140 return;
2141 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002142 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002143 return;
2144 case XML_PARSER_PROLOG:
2145 case XML_PARSER_START:
2146 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002147 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return;
2149 case XML_PARSER_ENTITY_DECL:
2150 case XML_PARSER_CONTENT:
2151 case XML_PARSER_ATTRIBUTE_VALUE:
2152 case XML_PARSER_PI:
2153 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002154 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002155 /* we just ignore it there */
2156 return;
2157 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002158 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002159 return;
2160 case XML_PARSER_ENTITY_VALUE:
2161 /*
2162 * NOTE: in the case of entity values, we don't do the
2163 * substitution here since we need the literal
2164 * entity value to be able to save the internal
2165 * subset of the document.
2166 * This will be handled by xmlStringDecodeEntities
2167 */
2168 return;
2169 case XML_PARSER_DTD:
2170 /*
2171 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2172 * In the internal DTD subset, parameter-entity references
2173 * can occur only where markup declarations can occur, not
2174 * within markup declarations.
2175 * In that case this is handled in xmlParseMarkupDecl
2176 */
2177 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2178 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002179 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002180 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002181 break;
2182 case XML_PARSER_IGNORE:
2183 return;
2184 }
2185
2186 NEXT;
2187 name = xmlParseName(ctxt);
2188 if (xmlParserDebugEntities)
2189 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002190 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002191 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002192 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002193 } else {
2194 if (RAW == ';') {
2195 NEXT;
2196 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2197 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2198 if (entity == NULL) {
2199
2200 /*
2201 * [ WFC: Entity Declared ]
2202 * In a document without any DTD, a document with only an
2203 * internal DTD subset which contains no parameter entity
2204 * references, or a document with "standalone='yes'", ...
2205 * ... The declaration of a parameter entity must precede
2206 * any reference to it...
2207 */
2208 if ((ctxt->standalone == 1) ||
2209 ((ctxt->hasExternalSubset == 0) &&
2210 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002211 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002212 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002213 } else {
2214 /*
2215 * [ VC: Entity Declared ]
2216 * In a document with an external subset or external
2217 * parameter entities with "standalone='no'", ...
2218 * ... The declaration of a parameter entity must precede
2219 * any reference to it...
2220 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002221 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2222 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2223 "PEReference: %%%s; not found\n",
2224 name);
2225 } else
2226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2227 "PEReference: %%%s; not found\n",
2228 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002229 ctxt->valid = 0;
2230 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002231 } else if (ctxt->input->free != deallocblankswrapper) {
2232 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2233 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002234 } else {
2235 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2236 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002237 xmlChar start[4];
2238 xmlCharEncoding enc;
2239
Owen Taylor3473f882001-02-23 17:55:21 +00002240 /*
2241 * handle the extra spaces added before and after
2242 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002243 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002244 */
2245 input = xmlNewEntityInputStream(ctxt, entity);
2246 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002247
2248 /*
2249 * Get the 4 first bytes and decode the charset
2250 * if enc != XML_CHAR_ENCODING_NONE
2251 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002252 * Note that, since we may have some non-UTF8
2253 * encoding (like UTF16, bug 135229), the 'length'
2254 * is not known, but we can calculate based upon
2255 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002256 */
2257 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002258 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002259 start[0] = RAW;
2260 start[1] = NXT(1);
2261 start[2] = NXT(2);
2262 start[3] = NXT(3);
2263 enc = xmlDetectCharEncoding(start, 4);
2264 if (enc != XML_CHAR_ENCODING_NONE) {
2265 xmlSwitchEncoding(ctxt, enc);
2266 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002267 }
2268
Owen Taylor3473f882001-02-23 17:55:21 +00002269 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002270 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2271 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002272 xmlParseTextDecl(ctxt);
2273 }
Owen Taylor3473f882001-02-23 17:55:21 +00002274 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002275 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2276 "PEReference: %s is not a parameter entity\n",
2277 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002278 }
2279 }
2280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002281 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
Owen Taylor3473f882001-02-23 17:55:21 +00002283 }
2284}
2285
2286/*
2287 * Macro used to grow the current buffer.
2288 */
2289#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002290 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002291 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002292 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002293 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002294 if (tmp == NULL) goto mem_error; \
2295 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002296}
2297
2298/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002299 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002300 * @ctxt: the parser context
2301 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002302 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002303 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2304 * @end: an end marker xmlChar, 0 if none
2305 * @end2: an end marker xmlChar, 0 if none
2306 * @end3: an end marker xmlChar, 0 if none
2307 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002308 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002309 *
2310 * [67] Reference ::= EntityRef | CharRef
2311 *
2312 * [69] PEReference ::= '%' Name ';'
2313 *
2314 * Returns A newly allocated string with the substitution done. The caller
2315 * must deallocate it !
2316 */
2317xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002318xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 xmlChar *buffer = NULL;
2321 int buffer_size = 0;
2322
2323 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002324 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlEntityPtr ent;
2326 int c,l;
2327 int nbchars = 0;
2328
Daniel Veillarda82b1822004-11-08 16:24:57 +00002329 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002330 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002332
2333 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002334 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002335 return(NULL);
2336 }
2337
2338 /*
2339 * allocate a translation buffer.
2340 */
2341 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002342 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002343 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002344
2345 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002346 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002347 * we are operating on already parsed values.
2348 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002349 if (str < last)
2350 c = CUR_SCHAR(str, l);
2351 else
2352 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002353 while ((c != 0) && (c != end) && /* non input consuming loop */
2354 (c != end2) && (c != end3)) {
2355
2356 if (c == 0) break;
2357 if ((c == '&') && (str[1] == '#')) {
2358 int val = xmlParseStringCharRef(ctxt, &str);
2359 if (val != 0) {
2360 COPY_BUF(0,buffer,nbchars,val);
2361 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002362 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2363 growBuffer(buffer);
2364 }
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2366 if (xmlParserDebugEntities)
2367 xmlGenericError(xmlGenericErrorContext,
2368 "String decoding Entity Reference: %.30s\n",
2369 str);
2370 ent = xmlParseStringEntityRef(ctxt, &str);
2371 if ((ent != NULL) &&
2372 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2373 if (ent->content != NULL) {
2374 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002375 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2376 growBuffer(buffer);
2377 }
Owen Taylor3473f882001-02-23 17:55:21 +00002378 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002379 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2380 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002381 }
2382 } else if ((ent != NULL) && (ent->content != NULL)) {
2383 xmlChar *rep;
2384
2385 ctxt->depth++;
2386 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2387 0, 0, 0);
2388 ctxt->depth--;
2389 if (rep != NULL) {
2390 current = rep;
2391 while (*current != 0) { /* non input consuming loop */
2392 buffer[nbchars++] = *current++;
2393 if (nbchars >
2394 buffer_size - XML_PARSER_BUFFER_SIZE) {
2395 growBuffer(buffer);
2396 }
2397 }
2398 xmlFree(rep);
2399 }
2400 } else if (ent != NULL) {
2401 int i = xmlStrlen(ent->name);
2402 const xmlChar *cur = ent->name;
2403
2404 buffer[nbchars++] = '&';
2405 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2406 growBuffer(buffer);
2407 }
2408 for (;i > 0;i--)
2409 buffer[nbchars++] = *cur++;
2410 buffer[nbchars++] = ';';
2411 }
2412 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2413 if (xmlParserDebugEntities)
2414 xmlGenericError(xmlGenericErrorContext,
2415 "String decoding PE Reference: %.30s\n", str);
2416 ent = xmlParseStringPEReference(ctxt, &str);
2417 if (ent != NULL) {
2418 xmlChar *rep;
2419
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002420 if (ent->content == NULL) {
2421 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2422 }
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 ctxt->depth++;
2425 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2426 0, 0, 0);
2427 ctxt->depth--;
2428 if (rep != NULL) {
2429 current = rep;
2430 while (*current != 0) { /* non input consuming loop */
2431 buffer[nbchars++] = *current++;
2432 if (nbchars >
2433 buffer_size - XML_PARSER_BUFFER_SIZE) {
2434 growBuffer(buffer);
2435 }
2436 }
2437 xmlFree(rep);
2438 }
2439 }
2440 } else {
2441 COPY_BUF(l,buffer,nbchars,c);
2442 str += l;
2443 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2444 growBuffer(buffer);
2445 }
2446 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002447 if (str < last)
2448 c = CUR_SCHAR(str, l);
2449 else
2450 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002451 }
2452 buffer[nbchars++] = 0;
2453 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002454
2455mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002456 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002457 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002458}
2459
Daniel Veillarde57ec792003-09-10 10:50:59 +00002460/**
2461 * xmlStringDecodeEntities:
2462 * @ctxt: the parser context
2463 * @str: the input string
2464 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2465 * @end: an end marker xmlChar, 0 if none
2466 * @end2: an end marker xmlChar, 0 if none
2467 * @end3: an end marker xmlChar, 0 if none
2468 *
2469 * Takes a entity string content and process to do the adequate substitutions.
2470 *
2471 * [67] Reference ::= EntityRef | CharRef
2472 *
2473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * Returns A newly allocated string with the substitution done. The caller
2476 * must deallocate it !
2477 */
2478xmlChar *
2479xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2480 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002482 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2483 end, end2, end3));
2484}
Owen Taylor3473f882001-02-23 17:55:21 +00002485
2486/************************************************************************
2487 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * Commodity functions, cleanup needed ? *
2489 * *
2490 ************************************************************************/
2491
2492/**
2493 * areBlanks:
2494 * @ctxt: an XML parser context
2495 * @str: a xmlChar *
2496 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002497 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002498 *
2499 * Is this a sequence of blank chars that one can ignore ?
2500 *
2501 * Returns 1 if ignorable 0 otherwise.
2502 */
2503
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002504static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2505 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002506 int i, ret;
2507 xmlNodePtr lastChild;
2508
Daniel Veillard05c13a22001-09-09 08:38:09 +00002509 /*
2510 * Don't spend time trying to differentiate them, the same callback is
2511 * used !
2512 */
2513 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002514 return(0);
2515
Owen Taylor3473f882001-02-23 17:55:21 +00002516 /*
2517 * Check for xml:space value.
2518 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002519 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2520 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002521 return(0);
2522
2523 /*
2524 * Check that the string is made of blanks
2525 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002526 if (blank_chars == 0) {
2527 for (i = 0;i < len;i++)
2528 if (!(IS_BLANK_CH(str[i]))) return(0);
2529 }
Owen Taylor3473f882001-02-23 17:55:21 +00002530
2531 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002532 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002533 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002534 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002535 if (ctxt->myDoc != NULL) {
2536 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2537 if (ret == 0) return(1);
2538 if (ret == 1) return(0);
2539 }
2540
2541 /*
2542 * Otherwise, heuristic :-\
2543 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002544 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if ((ctxt->node->children == NULL) &&
2546 (RAW == '<') && (NXT(1) == '/')) return(0);
2547
2548 lastChild = xmlGetLastChild(ctxt->node);
2549 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002550 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2551 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 } else if (xmlNodeIsText(lastChild))
2553 return(0);
2554 else if ((ctxt->node->children != NULL) &&
2555 (xmlNodeIsText(ctxt->node->children)))
2556 return(0);
2557 return(1);
2558}
2559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/************************************************************************
2561 * *
2562 * Extra stuff for namespace support *
2563 * Relates to http://www.w3.org/TR/WD-xml-names *
2564 * *
2565 ************************************************************************/
2566
2567/**
2568 * xmlSplitQName:
2569 * @ctxt: an XML parser context
2570 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002571 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * parse an UTF8 encoded XML qualified name string
2574 *
2575 * [NS 5] QName ::= (Prefix ':')? LocalPart
2576 *
2577 * [NS 6] Prefix ::= NCName
2578 *
2579 * [NS 7] LocalPart ::= NCName
2580 *
2581 * Returns the local part, and prefix is updated
2582 * to get the Prefix if any.
2583 */
2584
2585xmlChar *
2586xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2587 xmlChar buf[XML_MAX_NAMELEN + 5];
2588 xmlChar *buffer = NULL;
2589 int len = 0;
2590 int max = XML_MAX_NAMELEN;
2591 xmlChar *ret = NULL;
2592 const xmlChar *cur = name;
2593 int c;
2594
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002595 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002596 *prefix = NULL;
2597
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002598 if (cur == NULL) return(NULL);
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600#ifndef XML_XML_NAMESPACE
2601 /* xml: prefix is not really a namespace */
2602 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2603 (cur[2] == 'l') && (cur[3] == ':'))
2604 return(xmlStrdup(name));
2605#endif
2606
Daniel Veillard597bc482003-07-24 16:08:28 +00002607 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002608 if (cur[0] == ':')
2609 return(xmlStrdup(name));
2610
2611 c = *cur++;
2612 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2613 buf[len++] = c;
2614 c = *cur++;
2615 }
2616 if (len >= max) {
2617 /*
2618 * Okay someone managed to make a huge name, so he's ready to pay
2619 * for the processing speed.
2620 */
2621 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002622
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002623 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002625 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002626 return(NULL);
2627 }
2628 memcpy(buffer, buf, len);
2629 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2630 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002631 xmlChar *tmp;
2632
Owen Taylor3473f882001-02-23 17:55:21 +00002633 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002634 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002635 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002636 if (tmp == NULL) {
2637 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002638 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002639 return(NULL);
2640 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002641 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002642 }
2643 buffer[len++] = c;
2644 c = *cur++;
2645 }
2646 buffer[len] = 0;
2647 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002648
Daniel Veillard597bc482003-07-24 16:08:28 +00002649 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002650 if (buffer != NULL)
2651 xmlFree(buffer);
2652 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002653 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002654 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002655
Owen Taylor3473f882001-02-23 17:55:21 +00002656 if (buffer == NULL)
2657 ret = xmlStrndup(buf, len);
2658 else {
2659 ret = buffer;
2660 buffer = NULL;
2661 max = XML_MAX_NAMELEN;
2662 }
2663
2664
2665 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002666 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002667 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002668 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002669 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002670 }
Owen Taylor3473f882001-02-23 17:55:21 +00002671 len = 0;
2672
Daniel Veillardbb284f42002-10-16 18:02:47 +00002673 /*
2674 * Check that the first character is proper to start
2675 * a new name
2676 */
2677 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2678 ((c >= 0x41) && (c <= 0x5A)) ||
2679 (c == '_') || (c == ':'))) {
2680 int l;
2681 int first = CUR_SCHAR(cur, l);
2682
2683 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002684 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002685 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002686 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002687 }
2688 }
2689 cur++;
2690
Owen Taylor3473f882001-02-23 17:55:21 +00002691 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2692 buf[len++] = c;
2693 c = *cur++;
2694 }
2695 if (len >= max) {
2696 /*
2697 * Okay someone managed to make a huge name, so he's ready to pay
2698 * for the processing speed.
2699 */
2700 max = len * 2;
2701
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002702 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002703 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002704 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
2707 memcpy(buffer, buf, len);
2708 while (c != 0) { /* tested bigname2.xml */
2709 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002710 xmlChar *tmp;
2711
Owen Taylor3473f882001-02-23 17:55:21 +00002712 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002713 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002714 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002715 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002716 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002717 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002718 return(NULL);
2719 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002720 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 }
2722 buffer[len++] = c;
2723 c = *cur++;
2724 }
2725 buffer[len] = 0;
2726 }
2727
2728 if (buffer == NULL)
2729 ret = xmlStrndup(buf, len);
2730 else {
2731 ret = buffer;
2732 }
2733 }
2734
2735 return(ret);
2736}
2737
2738/************************************************************************
2739 * *
2740 * The parser itself *
2741 * Relates to http://www.w3.org/TR/REC-xml *
2742 * *
2743 ************************************************************************/
2744
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002745static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002746static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002747 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002748
Owen Taylor3473f882001-02-23 17:55:21 +00002749/**
2750 * xmlParseName:
2751 * @ctxt: an XML parser context
2752 *
2753 * parse an XML name.
2754 *
2755 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2756 * CombiningChar | Extender
2757 *
2758 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2759 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002760 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002761 *
2762 * Returns the Name parsed or NULL
2763 */
2764
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002765const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002766xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002767 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002768 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002769 int count = 0;
2770
2771 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772
2773 /*
2774 * Accelerator for simple ASCII names
2775 */
2776 in = ctxt->input->cur;
2777 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2778 ((*in >= 0x41) && (*in <= 0x5A)) ||
2779 (*in == '_') || (*in == ':')) {
2780 in++;
2781 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2782 ((*in >= 0x41) && (*in <= 0x5A)) ||
2783 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002784 (*in == '_') || (*in == '-') ||
2785 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002786 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002787 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002788 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002789 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002790 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002791 ctxt->nbChars += count;
2792 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002793 if (ret == NULL)
2794 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002795 return(ret);
2796 }
2797 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002798 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002799}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002800
Daniel Veillard46de64e2002-05-29 08:21:33 +00002801/**
2802 * xmlParseNameAndCompare:
2803 * @ctxt: an XML parser context
2804 *
2805 * parse an XML name and compares for match
2806 * (specialized for endtag parsing)
2807 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002808 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2809 * and the name for mismatch
2810 */
2811
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002812static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002813xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002814 register const xmlChar *cmp = other;
2815 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002816 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002817
2818 GROW;
2819
2820 in = ctxt->input->cur;
2821 while (*in != 0 && *in == *cmp) {
2822 ++in;
2823 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002824 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002825 }
William M. Brack76e95df2003-10-18 16:20:14 +00002826 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002827 /* success */
2828 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002829 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002830 }
2831 /* failure (or end of input buffer), check with full function */
2832 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002833 /* strings coming from the dictionnary direct compare possible */
2834 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002835 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002836 }
2837 return ret;
2838}
2839
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002840static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002841xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002842 int len = 0, l;
2843 int c;
2844 int count = 0;
2845
2846 /*
2847 * Handler for more complex cases
2848 */
2849 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 c = CUR_CHAR(l);
2851 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2852 (!IS_LETTER(c) && (c != '_') &&
2853 (c != ':'))) {
2854 return(NULL);
2855 }
2856
2857 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002858 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002859 (c == '.') || (c == '-') ||
2860 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002861 (IS_COMBINING(c)) ||
2862 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002863 if (count++ > 100) {
2864 count = 0;
2865 GROW;
2866 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002867 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002868 NEXTL(l);
2869 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002870 }
Daniel Veillard96688262005-08-23 18:14:12 +00002871 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2872 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002873 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002874}
2875
2876/**
2877 * xmlParseStringName:
2878 * @ctxt: an XML parser context
2879 * @str: a pointer to the string pointer (IN/OUT)
2880 *
2881 * parse an XML name.
2882 *
2883 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2884 * CombiningChar | Extender
2885 *
2886 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2887 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002888 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002889 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002890 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002891 * is updated to the current location in the string.
2892 */
2893
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002894static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002895xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2896 xmlChar buf[XML_MAX_NAMELEN + 5];
2897 const xmlChar *cur = *str;
2898 int len = 0, l;
2899 int c;
2900
2901 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002902 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002903 (c != ':')) {
2904 return(NULL);
2905 }
2906
William M. Brack871611b2003-10-18 04:53:14 +00002907 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002908 (c == '.') || (c == '-') ||
2909 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002910 (IS_COMBINING(c)) ||
2911 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002912 COPY_BUF(l,buf,len,c);
2913 cur += l;
2914 c = CUR_SCHAR(cur, l);
2915 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2916 /*
2917 * Okay someone managed to make a huge name, so he's ready to pay
2918 * for the processing speed.
2919 */
2920 xmlChar *buffer;
2921 int max = len * 2;
2922
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002923 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002924 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002925 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002926 return(NULL);
2927 }
2928 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002929 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002930 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002931 (c == '.') || (c == '-') ||
2932 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002933 (IS_COMBINING(c)) ||
2934 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002935 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002936 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002937 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002938 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002939 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002940 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002941 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002943 return(NULL);
2944 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002946 }
2947 COPY_BUF(l,buffer,len,c);
2948 cur += l;
2949 c = CUR_SCHAR(cur, l);
2950 }
2951 buffer[len] = 0;
2952 *str = cur;
2953 return(buffer);
2954 }
2955 }
2956 *str = cur;
2957 return(xmlStrndup(buf, len));
2958}
2959
2960/**
2961 * xmlParseNmtoken:
2962 * @ctxt: an XML parser context
2963 *
2964 * parse an XML Nmtoken.
2965 *
2966 * [7] Nmtoken ::= (NameChar)+
2967 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002968 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002969 *
2970 * Returns the Nmtoken parsed or NULL
2971 */
2972
2973xmlChar *
2974xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2975 xmlChar buf[XML_MAX_NAMELEN + 5];
2976 int len = 0, l;
2977 int c;
2978 int count = 0;
2979
2980 GROW;
2981 c = CUR_CHAR(l);
2982
William M. Brack871611b2003-10-18 04:53:14 +00002983 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002984 (c == '.') || (c == '-') ||
2985 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002986 (IS_COMBINING(c)) ||
2987 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002988 if (count++ > 100) {
2989 count = 0;
2990 GROW;
2991 }
2992 COPY_BUF(l,buf,len,c);
2993 NEXTL(l);
2994 c = CUR_CHAR(l);
2995 if (len >= XML_MAX_NAMELEN) {
2996 /*
2997 * Okay someone managed to make a huge token, so he's ready to pay
2998 * for the processing speed.
2999 */
3000 xmlChar *buffer;
3001 int max = len * 2;
3002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003009 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003010 (c == '.') || (c == '-') ||
3011 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003012 (IS_COMBINING(c)) ||
3013 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003014 if (count++ > 100) {
3015 count = 0;
3016 GROW;
3017 }
3018 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 xmlChar *tmp;
3020
Owen Taylor3473f882001-02-23 17:55:21 +00003021 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003022 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003023 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003024 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003025 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003026 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003029 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003030 }
3031 COPY_BUF(l,buffer,len,c);
3032 NEXTL(l);
3033 c = CUR_CHAR(l);
3034 }
3035 buffer[len] = 0;
3036 return(buffer);
3037 }
3038 }
3039 if (len == 0)
3040 return(NULL);
3041 return(xmlStrndup(buf, len));
3042}
3043
3044/**
3045 * xmlParseEntityValue:
3046 * @ctxt: an XML parser context
3047 * @orig: if non-NULL store a copy of the original entity value
3048 *
3049 * parse a value for ENTITY declarations
3050 *
3051 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3052 * "'" ([^%&'] | PEReference | Reference)* "'"
3053 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003054 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003055 */
3056
3057xmlChar *
3058xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3059 xmlChar *buf = NULL;
3060 int len = 0;
3061 int size = XML_PARSER_BUFFER_SIZE;
3062 int c, l;
3063 xmlChar stop;
3064 xmlChar *ret = NULL;
3065 const xmlChar *cur = NULL;
3066 xmlParserInputPtr input;
3067
3068 if (RAW == '"') stop = '"';
3069 else if (RAW == '\'') stop = '\'';
3070 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003071 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003072 return(NULL);
3073 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003074 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003076 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003077 return(NULL);
3078 }
3079
3080 /*
3081 * The content of the entity definition is copied in a buffer.
3082 */
3083
3084 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3085 input = ctxt->input;
3086 GROW;
3087 NEXT;
3088 c = CUR_CHAR(l);
3089 /*
3090 * NOTE: 4.4.5 Included in Literal
3091 * When a parameter entity reference appears in a literal entity
3092 * value, ... a single or double quote character in the replacement
3093 * text is always treated as a normal data character and will not
3094 * terminate the literal.
3095 * In practice it means we stop the loop only when back at parsing
3096 * the initial entity and the quote is found
3097 */
William M. Brack871611b2003-10-18 04:53:14 +00003098 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003099 (ctxt->input != input))) {
3100 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003101 xmlChar *tmp;
3102
Owen Taylor3473f882001-02-23 17:55:21 +00003103 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3105 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003107 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003108 return(NULL);
3109 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003110 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003111 }
3112 COPY_BUF(l,buf,len,c);
3113 NEXTL(l);
3114 /*
3115 * Pop-up of finished entities.
3116 */
3117 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3118 xmlPopInput(ctxt);
3119
3120 GROW;
3121 c = CUR_CHAR(l);
3122 if (c == 0) {
3123 GROW;
3124 c = CUR_CHAR(l);
3125 }
3126 }
3127 buf[len] = 0;
3128
3129 /*
3130 * Raise problem w.r.t. '&' and '%' being used in non-entities
3131 * reference constructs. Note Charref will be handled in
3132 * xmlStringDecodeEntities()
3133 */
3134 cur = buf;
3135 while (*cur != 0) { /* non input consuming */
3136 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3137 xmlChar *name;
3138 xmlChar tmp = *cur;
3139
3140 cur++;
3141 name = xmlParseStringName(ctxt, &cur);
3142 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003143 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003144 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003145 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003146 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003147 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3148 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003149 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003150 }
3151 if (name != NULL)
3152 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003153 if (*cur == 0)
3154 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003155 }
3156 cur++;
3157 }
3158
3159 /*
3160 * Then PEReference entities are substituted.
3161 */
3162 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003163 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003164 xmlFree(buf);
3165 } else {
3166 NEXT;
3167 /*
3168 * NOTE: 4.4.7 Bypassed
3169 * When a general entity reference appears in the EntityValue in
3170 * an entity declaration, it is bypassed and left as is.
3171 * so XML_SUBSTITUTE_REF is not set here.
3172 */
3173 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3174 0, 0, 0);
3175 if (orig != NULL)
3176 *orig = buf;
3177 else
3178 xmlFree(buf);
3179 }
3180
3181 return(ret);
3182}
3183
3184/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003185 * xmlParseAttValueComplex:
3186 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003187 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003188 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003189 *
3190 * parse a value for an attribute, this is the fallback function
3191 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003192 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003193 *
3194 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3195 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003196static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003197xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003198 xmlChar limit = 0;
3199 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003200 int len = 0;
3201 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003203 xmlChar *current = NULL;
3204 xmlEntityPtr ent;
3205
Owen Taylor3473f882001-02-23 17:55:21 +00003206 if (NXT(0) == '"') {
3207 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3208 limit = '"';
3209 NEXT;
3210 } else if (NXT(0) == '\'') {
3211 limit = '\'';
3212 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3213 NEXT;
3214 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003215 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(NULL);
3217 }
3218
3219 /*
3220 * allocate a translation buffer.
3221 */
3222 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003223 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003224 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003225
3226 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003227 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003228 */
3229 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003230 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003231 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003232 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003233 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003234 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003235 if (NXT(1) == '#') {
3236 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003237
Owen Taylor3473f882001-02-23 17:55:21 +00003238 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003239 if (ctxt->replaceEntities) {
3240 if (len > buf_size - 10) {
3241 growBuffer(buf);
3242 }
3243 buf[len++] = '&';
3244 } else {
3245 /*
3246 * The reparsing will be done in xmlStringGetNodeList()
3247 * called by the attribute() function in SAX.c
3248 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003249 if (len > buf_size - 10) {
3250 growBuffer(buf);
3251 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003252 buf[len++] = '&';
3253 buf[len++] = '#';
3254 buf[len++] = '3';
3255 buf[len++] = '8';
3256 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003257 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003258 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003259 if (len > buf_size - 10) {
3260 growBuffer(buf);
3261 }
Owen Taylor3473f882001-02-23 17:55:21 +00003262 len += xmlCopyChar(0, &buf[len], val);
3263 }
3264 } else {
3265 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003266 if ((ent != NULL) &&
3267 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3268 if (len > buf_size - 10) {
3269 growBuffer(buf);
3270 }
3271 if ((ctxt->replaceEntities == 0) &&
3272 (ent->content[0] == '&')) {
3273 buf[len++] = '&';
3274 buf[len++] = '#';
3275 buf[len++] = '3';
3276 buf[len++] = '8';
3277 buf[len++] = ';';
3278 } else {
3279 buf[len++] = ent->content[0];
3280 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003281 } else if ((ent != NULL) &&
3282 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003283 xmlChar *rep;
3284
3285 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3286 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003287 XML_SUBSTITUTE_REF,
3288 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003289 if (rep != NULL) {
3290 current = rep;
3291 while (*current != 0) { /* non input consuming */
3292 buf[len++] = *current++;
3293 if (len > buf_size - 10) {
3294 growBuffer(buf);
3295 }
3296 }
3297 xmlFree(rep);
3298 }
3299 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003300 if (len > buf_size - 10) {
3301 growBuffer(buf);
3302 }
Owen Taylor3473f882001-02-23 17:55:21 +00003303 if (ent->content != NULL)
3304 buf[len++] = ent->content[0];
3305 }
3306 } else if (ent != NULL) {
3307 int i = xmlStrlen(ent->name);
3308 const xmlChar *cur = ent->name;
3309
3310 /*
3311 * This may look absurd but is needed to detect
3312 * entities problems
3313 */
3314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3315 (ent->content != NULL)) {
3316 xmlChar *rep;
3317 rep = xmlStringDecodeEntities(ctxt, ent->content,
3318 XML_SUBSTITUTE_REF, 0, 0, 0);
3319 if (rep != NULL)
3320 xmlFree(rep);
3321 }
3322
3323 /*
3324 * Just output the reference
3325 */
3326 buf[len++] = '&';
3327 if (len > buf_size - i - 10) {
3328 growBuffer(buf);
3329 }
3330 for (;i > 0;i--)
3331 buf[len++] = *cur++;
3332 buf[len++] = ';';
3333 }
3334 }
3335 } else {
3336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003337 if ((len != 0) || (!normalize)) {
3338 if ((!normalize) || (!in_space)) {
3339 COPY_BUF(l,buf,len,0x20);
3340 if (len > buf_size - 10) {
3341 growBuffer(buf);
3342 }
3343 }
3344 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003345 }
3346 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003347 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003348 COPY_BUF(l,buf,len,c);
3349 if (len > buf_size - 10) {
3350 growBuffer(buf);
3351 }
3352 }
3353 NEXTL(l);
3354 }
3355 GROW;
3356 c = CUR_CHAR(l);
3357 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003358 if ((in_space) && (normalize)) {
3359 while (buf[len - 1] == 0x20) len--;
3360 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003361 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003363 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003364 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003365 if ((c != 0) && (!IS_CHAR(c))) {
3366 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3367 "invalid character in attribute value\n");
3368 } else {
3369 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3370 "AttValue: ' expected\n");
3371 }
Owen Taylor3473f882001-02-23 17:55:21 +00003372 } else
3373 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003374 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003375 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003376
3377mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003378 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003379 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003380}
3381
3382/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003383 * xmlParseAttValue:
3384 * @ctxt: an XML parser context
3385 *
3386 * parse a value for an attribute
3387 * Note: the parser won't do substitution of entities here, this
3388 * will be handled later in xmlStringGetNodeList
3389 *
3390 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3391 * "'" ([^<&'] | Reference)* "'"
3392 *
3393 * 3.3.3 Attribute-Value Normalization:
3394 * Before the value of an attribute is passed to the application or
3395 * checked for validity, the XML processor must normalize it as follows:
3396 * - a character reference is processed by appending the referenced
3397 * character to the attribute value
3398 * - an entity reference is processed by recursively processing the
3399 * replacement text of the entity
3400 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3401 * appending #x20 to the normalized value, except that only a single
3402 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3403 * parsed entity or the literal entity value of an internal parsed entity
3404 * - other characters are processed by appending them to the normalized value
3405 * If the declared value is not CDATA, then the XML processor must further
3406 * process the normalized attribute value by discarding any leading and
3407 * trailing space (#x20) characters, and by replacing sequences of space
3408 * (#x20) characters by a single space (#x20) character.
3409 * All attributes for which no declaration has been read should be treated
3410 * by a non-validating parser as if declared CDATA.
3411 *
3412 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3413 */
3414
3415
3416xmlChar *
3417xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003418 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003419 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003420}
3421
3422/**
Owen Taylor3473f882001-02-23 17:55:21 +00003423 * xmlParseSystemLiteral:
3424 * @ctxt: an XML parser context
3425 *
3426 * parse an XML Literal
3427 *
3428 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3429 *
3430 * Returns the SystemLiteral parsed or NULL
3431 */
3432
3433xmlChar *
3434xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3435 xmlChar *buf = NULL;
3436 int len = 0;
3437 int size = XML_PARSER_BUFFER_SIZE;
3438 int cur, l;
3439 xmlChar stop;
3440 int state = ctxt->instate;
3441 int count = 0;
3442
3443 SHRINK;
3444 if (RAW == '"') {
3445 NEXT;
3446 stop = '"';
3447 } else if (RAW == '\'') {
3448 NEXT;
3449 stop = '\'';
3450 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003451 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003452 return(NULL);
3453 }
3454
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003455 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003456 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003457 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003458 return(NULL);
3459 }
3460 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3461 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003462 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003463 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003464 xmlChar *tmp;
3465
Owen Taylor3473f882001-02-23 17:55:21 +00003466 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003467 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3468 if (tmp == NULL) {
3469 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 ctxt->instate = (xmlParserInputState) state;
3472 return(NULL);
3473 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003474 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 }
3476 count++;
3477 if (count > 50) {
3478 GROW;
3479 count = 0;
3480 }
3481 COPY_BUF(l,buf,len,cur);
3482 NEXTL(l);
3483 cur = CUR_CHAR(l);
3484 if (cur == 0) {
3485 GROW;
3486 SHRINK;
3487 cur = CUR_CHAR(l);
3488 }
3489 }
3490 buf[len] = 0;
3491 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003492 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003493 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 } else {
3495 NEXT;
3496 }
3497 return(buf);
3498}
3499
3500/**
3501 * xmlParsePubidLiteral:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse an XML public literal
3505 *
3506 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3507 *
3508 * Returns the PubidLiteral parsed or NULL.
3509 */
3510
3511xmlChar *
3512xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3513 xmlChar *buf = NULL;
3514 int len = 0;
3515 int size = XML_PARSER_BUFFER_SIZE;
3516 xmlChar cur;
3517 xmlChar stop;
3518 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003519 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003520
3521 SHRINK;
3522 if (RAW == '"') {
3523 NEXT;
3524 stop = '"';
3525 } else if (RAW == '\'') {
3526 NEXT;
3527 stop = '\'';
3528 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003529 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003530 return(NULL);
3531 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003532 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003533 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003534 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003535 return(NULL);
3536 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003537 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003538 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003539 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003540 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003541 xmlChar *tmp;
3542
Owen Taylor3473f882001-02-23 17:55:21 +00003543 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003544 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3545 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003546 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003547 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 return(NULL);
3549 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003550 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003551 }
3552 buf[len++] = cur;
3553 count++;
3554 if (count > 50) {
3555 GROW;
3556 count = 0;
3557 }
3558 NEXT;
3559 cur = CUR;
3560 if (cur == 0) {
3561 GROW;
3562 SHRINK;
3563 cur = CUR;
3564 }
3565 }
3566 buf[len] = 0;
3567 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003568 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 } else {
3570 NEXT;
3571 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003572 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003573 return(buf);
3574}
3575
Daniel Veillard48b2f892001-02-25 16:11:03 +00003576void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003577
3578/*
3579 * used for the test in the inner loop of the char data testing
3580 */
3581static const unsigned char test_char_data[256] = {
3582 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3583 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3584 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3585 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3586 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3587 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3588 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3589 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3590 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3591 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3592 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3593 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3594 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3595 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3596 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3597 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3598 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3599 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3601 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3608 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3609 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3610 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3611 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3612 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3613 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3614};
3615
Owen Taylor3473f882001-02-23 17:55:21 +00003616/**
3617 * xmlParseCharData:
3618 * @ctxt: an XML parser context
3619 * @cdata: int indicating whether we are within a CDATA section
3620 *
3621 * parse a CharData section.
3622 * if we are within a CDATA section ']]>' marks an end of section.
3623 *
3624 * The right angle bracket (>) may be represented using the string "&gt;",
3625 * and must, for compatibility, be escaped using "&gt;" or a character
3626 * reference when it appears in the string "]]>" in content, when that
3627 * string is not marking the end of a CDATA section.
3628 *
3629 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3630 */
3631
3632void
3633xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003634 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003635 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003636 int line = ctxt->input->line;
3637 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003638 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003639
3640 SHRINK;
3641 GROW;
3642 /*
3643 * Accelerated common case where input don't need to be
3644 * modified before passing it to the handler.
3645 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003646 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003647 in = ctxt->input->cur;
3648 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003649get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003650 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003651 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003652 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003653 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003654 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003655 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003656 goto get_more_space;
3657 }
3658 if (*in == '<') {
3659 nbchar = in - ctxt->input->cur;
3660 if (nbchar > 0) {
3661 const xmlChar *tmp = ctxt->input->cur;
3662 ctxt->input->cur = in;
3663
Daniel Veillard34099b42004-11-04 17:34:35 +00003664 if ((ctxt->sax != NULL) &&
3665 (ctxt->sax->ignorableWhitespace !=
3666 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003667 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003668 if (ctxt->sax->ignorableWhitespace != NULL)
3669 ctxt->sax->ignorableWhitespace(ctxt->userData,
3670 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003671 } else {
3672 if (ctxt->sax->characters != NULL)
3673 ctxt->sax->characters(ctxt->userData,
3674 tmp, nbchar);
3675 if (*ctxt->space == -1)
3676 *ctxt->space = -2;
3677 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003678 } else if ((ctxt->sax != NULL) &&
3679 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003680 ctxt->sax->characters(ctxt->userData,
3681 tmp, nbchar);
3682 }
3683 }
3684 return;
3685 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003686
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003687get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003688 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003689 while (test_char_data[*in]) {
3690 in++;
3691 ccol++;
3692 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003693 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003694 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003695 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003696 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003697 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003698 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003699 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003700 }
3701 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003702 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003703 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003704 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003705 return;
3706 }
3707 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003708 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003709 goto get_more;
3710 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003711 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003712 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003713 if ((ctxt->sax != NULL) &&
3714 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003715 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003716 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003717 const xmlChar *tmp = ctxt->input->cur;
3718 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003719
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003720 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003721 if (ctxt->sax->ignorableWhitespace != NULL)
3722 ctxt->sax->ignorableWhitespace(ctxt->userData,
3723 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003724 } else {
3725 if (ctxt->sax->characters != NULL)
3726 ctxt->sax->characters(ctxt->userData,
3727 tmp, nbchar);
3728 if (*ctxt->space == -1)
3729 *ctxt->space = -2;
3730 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003731 line = ctxt->input->line;
3732 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003733 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003734 if (ctxt->sax->characters != NULL)
3735 ctxt->sax->characters(ctxt->userData,
3736 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003737 line = ctxt->input->line;
3738 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003739 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003740 }
3741 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003742 if (*in == 0xD) {
3743 in++;
3744 if (*in == 0xA) {
3745 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003746 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003747 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003748 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003749 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003750 in--;
3751 }
3752 if (*in == '<') {
3753 return;
3754 }
3755 if (*in == '&') {
3756 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003757 }
3758 SHRINK;
3759 GROW;
3760 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003761 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003762 nbchar = 0;
3763 }
Daniel Veillard50582112001-03-26 22:52:16 +00003764 ctxt->input->line = line;
3765 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003766 xmlParseCharDataComplex(ctxt, cdata);
3767}
3768
Daniel Veillard01c13b52002-12-10 15:19:08 +00003769/**
3770 * xmlParseCharDataComplex:
3771 * @ctxt: an XML parser context
3772 * @cdata: int indicating whether we are within a CDATA section
3773 *
3774 * parse a CharData section.this is the fallback function
3775 * of xmlParseCharData() when the parsing requires handling
3776 * of non-ASCII characters.
3777 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003778void
3779xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3781 int nbchar = 0;
3782 int cur, l;
3783 int count = 0;
3784
3785 SHRINK;
3786 GROW;
3787 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003788 while ((cur != '<') && /* checked */
3789 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003791 if ((cur == ']') && (NXT(1) == ']') &&
3792 (NXT(2) == '>')) {
3793 if (cdata) break;
3794 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003795 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 }
3797 }
3798 COPY_BUF(l,buf,nbchar,cur);
3799 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003800 buf[nbchar] = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003803 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003804 */
3805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003806 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (ctxt->sax->ignorableWhitespace != NULL)
3808 ctxt->sax->ignorableWhitespace(ctxt->userData,
3809 buf, nbchar);
3810 } else {
3811 if (ctxt->sax->characters != NULL)
3812 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003813 if ((ctxt->sax->characters !=
3814 ctxt->sax->ignorableWhitespace) &&
3815 (*ctxt->space == -1))
3816 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 }
3818 }
3819 nbchar = 0;
3820 }
3821 count++;
3822 if (count > 50) {
3823 GROW;
3824 count = 0;
3825 }
3826 NEXTL(l);
3827 cur = CUR_CHAR(l);
3828 }
3829 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003830 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003832 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003833 */
3834 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003835 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003836 if (ctxt->sax->ignorableWhitespace != NULL)
3837 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3838 } else {
3839 if (ctxt->sax->characters != NULL)
3840 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003841 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3842 (*ctxt->space == -1))
3843 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 }
3846 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003847 if ((cur != 0) && (!IS_CHAR(cur))) {
3848 /* Generate the error and skip the offending character */
3849 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3850 "PCDATA invalid Char value %d\n",
3851 cur);
3852 NEXTL(l);
3853 }
Owen Taylor3473f882001-02-23 17:55:21 +00003854}
3855
3856/**
3857 * xmlParseExternalID:
3858 * @ctxt: an XML parser context
3859 * @publicID: a xmlChar** receiving PubidLiteral
3860 * @strict: indicate whether we should restrict parsing to only
3861 * production [75], see NOTE below
3862 *
3863 * Parse an External ID or a Public ID
3864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003865 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003866 * 'PUBLIC' S PubidLiteral S SystemLiteral
3867 *
3868 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3869 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3870 *
3871 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3872 *
3873 * Returns the function returns SystemLiteral and in the second
3874 * case publicID receives PubidLiteral, is strict is off
3875 * it is possible to return NULL and have publicID set.
3876 */
3877
3878xmlChar *
3879xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3880 xmlChar *URI = NULL;
3881
3882 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003883
3884 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003885 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003886 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003887 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003888 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3889 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003890 }
3891 SKIP_BLANKS;
3892 URI = xmlParseSystemLiteral(ctxt);
3893 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003894 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003895 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003896 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003897 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003898 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003899 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003900 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003901 }
3902 SKIP_BLANKS;
3903 *publicID = xmlParsePubidLiteral(ctxt);
3904 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003905 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003906 }
3907 if (strict) {
3908 /*
3909 * We don't handle [83] so "S SystemLiteral" is required.
3910 */
William M. Brack76e95df2003-10-18 16:20:14 +00003911 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003912 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003913 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003914 }
3915 } else {
3916 /*
3917 * We handle [83] so we return immediately, if
3918 * "S SystemLiteral" is not detected. From a purely parsing
3919 * point of view that's a nice mess.
3920 */
3921 const xmlChar *ptr;
3922 GROW;
3923
3924 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003925 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003926
William M. Brack76e95df2003-10-18 16:20:14 +00003927 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003928 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3929 }
3930 SKIP_BLANKS;
3931 URI = xmlParseSystemLiteral(ctxt);
3932 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003933 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 }
3935 }
3936 return(URI);
3937}
3938
3939/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003940 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003941 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003942 * @buf: the already parsed part of the buffer
3943 * @len: number of bytes filles in the buffer
3944 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003945 *
3946 * Skip an XML (SGML) comment <!-- .... -->
3947 * The spec says that "For compatibility, the string "--" (double-hyphen)
3948 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003949 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003950 *
3951 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3952 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003953static void
3954xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003955 int q, ql;
3956 int r, rl;
3957 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003958 xmlParserInputPtr input = ctxt->input;
3959 int count = 0;
3960
Owen Taylor3473f882001-02-23 17:55:21 +00003961 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003962 len = 0;
3963 size = XML_PARSER_BUFFER_SIZE;
3964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3965 if (buf == NULL) {
3966 xmlErrMemory(ctxt, NULL);
3967 return;
3968 }
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003970 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003971 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003972 if (q == 0)
3973 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003974 if (!IS_CHAR(q)) {
3975 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3976 "xmlParseComment: invalid xmlChar value %d\n",
3977 q);
3978 xmlFree (buf);
3979 return;
3980 }
Owen Taylor3473f882001-02-23 17:55:21 +00003981 NEXTL(ql);
3982 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003983 if (r == 0)
3984 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003985 if (!IS_CHAR(r)) {
3986 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3987 "xmlParseComment: invalid xmlChar value %d\n",
3988 q);
3989 xmlFree (buf);
3990 return;
3991 }
Owen Taylor3473f882001-02-23 17:55:21 +00003992 NEXTL(rl);
3993 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003994 if (cur == 0)
3995 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003996 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003997 ((cur != '>') ||
3998 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003999 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004000 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004001 }
4002 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004003 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004004 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004005 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4006 if (new_buf == NULL) {
4007 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004008 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004009 return;
4010 }
William M. Bracka3215c72004-07-31 16:24:01 +00004011 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 COPY_BUF(ql,buf,len,q);
4014 q = r;
4015 ql = rl;
4016 r = cur;
4017 rl = l;
4018
4019 count++;
4020 if (count > 50) {
4021 GROW;
4022 count = 0;
4023 }
4024 NEXTL(l);
4025 cur = CUR_CHAR(l);
4026 if (cur == 0) {
4027 SHRINK;
4028 GROW;
4029 cur = CUR_CHAR(l);
4030 }
4031 }
4032 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004033 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004034 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004035 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004036 } else if (!IS_CHAR(cur)) {
4037 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4038 "xmlParseComment: invalid xmlChar value %d\n",
4039 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004040 } else {
4041 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004042 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4043 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 NEXT;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4047 (!ctxt->disableSAX))
4048 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 }
Daniel Veillardda629342007-08-01 07:49:06 +00004050 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004051 return;
4052not_terminated:
4053 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4054 "Comment not terminated\n", NULL);
4055 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004056 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004057}
Daniel Veillardda629342007-08-01 07:49:06 +00004058
Daniel Veillard4c778d82005-01-23 17:37:44 +00004059/**
4060 * xmlParseComment:
4061 * @ctxt: an XML parser context
4062 *
4063 * Skip an XML (SGML) comment <!-- .... -->
4064 * The spec says that "For compatibility, the string "--" (double-hyphen)
4065 * must not occur within comments. "
4066 *
4067 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4068 */
4069void
4070xmlParseComment(xmlParserCtxtPtr ctxt) {
4071 xmlChar *buf = NULL;
4072 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004073 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004074 xmlParserInputState state;
4075 const xmlChar *in;
4076 int nbchar = 0, ccol;
4077
4078 /*
4079 * Check that there is a comment right here.
4080 */
4081 if ((RAW != '<') || (NXT(1) != '!') ||
4082 (NXT(2) != '-') || (NXT(3) != '-')) return;
4083
4084 state = ctxt->instate;
4085 ctxt->instate = XML_PARSER_COMMENT;
4086 SKIP(4);
4087 SHRINK;
4088 GROW;
4089
4090 /*
4091 * Accelerated common case where input don't need to be
4092 * modified before passing it to the handler.
4093 */
4094 in = ctxt->input->cur;
4095 do {
4096 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004097 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004098 ctxt->input->line++; ctxt->input->col = 1;
4099 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004100 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004101 }
4102get_more:
4103 ccol = ctxt->input->col;
4104 while (((*in > '-') && (*in <= 0x7F)) ||
4105 ((*in >= 0x20) && (*in < '-')) ||
4106 (*in == 0x09)) {
4107 in++;
4108 ccol++;
4109 }
4110 ctxt->input->col = ccol;
4111 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004112 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004113 ctxt->input->line++; ctxt->input->col = 1;
4114 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004115 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004116 goto get_more;
4117 }
4118 nbchar = in - ctxt->input->cur;
4119 /*
4120 * save current set of data
4121 */
4122 if (nbchar > 0) {
4123 if ((ctxt->sax != NULL) &&
4124 (ctxt->sax->comment != NULL)) {
4125 if (buf == NULL) {
4126 if ((*in == '-') && (in[1] == '-'))
4127 size = nbchar + 1;
4128 else
4129 size = XML_PARSER_BUFFER_SIZE + nbchar;
4130 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4131 if (buf == NULL) {
4132 xmlErrMemory(ctxt, NULL);
4133 ctxt->instate = state;
4134 return;
4135 }
4136 len = 0;
4137 } else if (len + nbchar + 1 >= size) {
4138 xmlChar *new_buf;
4139 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4140 new_buf = (xmlChar *) xmlRealloc(buf,
4141 size * sizeof(xmlChar));
4142 if (new_buf == NULL) {
4143 xmlFree (buf);
4144 xmlErrMemory(ctxt, NULL);
4145 ctxt->instate = state;
4146 return;
4147 }
4148 buf = new_buf;
4149 }
4150 memcpy(&buf[len], ctxt->input->cur, nbchar);
4151 len += nbchar;
4152 buf[len] = 0;
4153 }
4154 }
4155 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004156 if (*in == 0xA) {
4157 in++;
4158 ctxt->input->line++; ctxt->input->col = 1;
4159 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004160 if (*in == 0xD) {
4161 in++;
4162 if (*in == 0xA) {
4163 ctxt->input->cur = in;
4164 in++;
4165 ctxt->input->line++; ctxt->input->col = 1;
4166 continue; /* while */
4167 }
4168 in--;
4169 }
4170 SHRINK;
4171 GROW;
4172 in = ctxt->input->cur;
4173 if (*in == '-') {
4174 if (in[1] == '-') {
4175 if (in[2] == '>') {
4176 SKIP(3);
4177 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4178 (!ctxt->disableSAX)) {
4179 if (buf != NULL)
4180 ctxt->sax->comment(ctxt->userData, buf);
4181 else
4182 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4183 }
4184 if (buf != NULL)
4185 xmlFree(buf);
4186 ctxt->instate = state;
4187 return;
4188 }
4189 if (buf != NULL)
4190 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4191 "Comment not terminated \n<!--%.50s\n",
4192 buf);
4193 else
4194 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4195 "Comment not terminated \n", NULL);
4196 in++;
4197 ctxt->input->col++;
4198 }
4199 in++;
4200 ctxt->input->col++;
4201 goto get_more;
4202 }
4203 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4204 xmlParseCommentComplex(ctxt, buf, len, size);
4205 ctxt->instate = state;
4206 return;
4207}
4208
Owen Taylor3473f882001-02-23 17:55:21 +00004209
4210/**
4211 * xmlParsePITarget:
4212 * @ctxt: an XML parser context
4213 *
4214 * parse the name of a PI
4215 *
4216 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4217 *
4218 * Returns the PITarget name or NULL
4219 */
4220
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004221const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004222xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004223 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004224
4225 name = xmlParseName(ctxt);
4226 if ((name != NULL) &&
4227 ((name[0] == 'x') || (name[0] == 'X')) &&
4228 ((name[1] == 'm') || (name[1] == 'M')) &&
4229 ((name[2] == 'l') || (name[2] == 'L'))) {
4230 int i;
4231 if ((name[0] == 'x') && (name[1] == 'm') &&
4232 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004233 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004234 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004235 return(name);
4236 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 return(name);
4239 }
4240 for (i = 0;;i++) {
4241 if (xmlW3CPIs[i] == NULL) break;
4242 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4243 return(name);
4244 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004245 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4246 "xmlParsePITarget: invalid name prefix 'xml'\n",
4247 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 return(name);
4250}
4251
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004252#ifdef LIBXML_CATALOG_ENABLED
4253/**
4254 * xmlParseCatalogPI:
4255 * @ctxt: an XML parser context
4256 * @catalog: the PI value string
4257 *
4258 * parse an XML Catalog Processing Instruction.
4259 *
4260 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4261 *
4262 * Occurs only if allowed by the user and if happening in the Misc
4263 * part of the document before any doctype informations
4264 * This will add the given catalog to the parsing context in order
4265 * to be used if there is a resolution need further down in the document
4266 */
4267
4268static void
4269xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4270 xmlChar *URL = NULL;
4271 const xmlChar *tmp, *base;
4272 xmlChar marker;
4273
4274 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004275 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004276 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4277 goto error;
4278 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004279 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004280 if (*tmp != '=') {
4281 return;
4282 }
4283 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004284 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004285 marker = *tmp;
4286 if ((marker != '\'') && (marker != '"'))
4287 goto error;
4288 tmp++;
4289 base = tmp;
4290 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4291 if (*tmp == 0)
4292 goto error;
4293 URL = xmlStrndup(base, tmp - base);
4294 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004295 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004296 if (*tmp != 0)
4297 goto error;
4298
4299 if (URL != NULL) {
4300 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4301 xmlFree(URL);
4302 }
4303 return;
4304
4305error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004306 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4307 "Catalog PI syntax error: %s\n",
4308 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004309 if (URL != NULL)
4310 xmlFree(URL);
4311}
4312#endif
4313
Owen Taylor3473f882001-02-23 17:55:21 +00004314/**
4315 * xmlParsePI:
4316 * @ctxt: an XML parser context
4317 *
4318 * parse an XML Processing Instruction.
4319 *
4320 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4321 *
4322 * The processing is transfered to SAX once parsed.
4323 */
4324
4325void
4326xmlParsePI(xmlParserCtxtPtr ctxt) {
4327 xmlChar *buf = NULL;
4328 int len = 0;
4329 int size = XML_PARSER_BUFFER_SIZE;
4330 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004331 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 xmlParserInputState state;
4333 int count = 0;
4334
4335 if ((RAW == '<') && (NXT(1) == '?')) {
4336 xmlParserInputPtr input = ctxt->input;
4337 state = ctxt->instate;
4338 ctxt->instate = XML_PARSER_PI;
4339 /*
4340 * this is a Processing Instruction.
4341 */
4342 SKIP(2);
4343 SHRINK;
4344
4345 /*
4346 * Parse the target name and check for special support like
4347 * namespace.
4348 */
4349 target = xmlParsePITarget(ctxt);
4350 if (target != NULL) {
4351 if ((RAW == '?') && (NXT(1) == '>')) {
4352 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004353 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4354 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004355 }
4356 SKIP(2);
4357
4358 /*
4359 * SAX: PI detected.
4360 */
4361 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->processingInstruction != NULL))
4363 ctxt->sax->processingInstruction(ctxt->userData,
4364 target, NULL);
4365 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 return;
4367 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004368 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004369 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004370 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 ctxt->instate = state;
4372 return;
4373 }
4374 cur = CUR;
4375 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004376 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4377 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 SKIP_BLANKS;
4380 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004381 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004382 ((cur != '?') || (NXT(1) != '>'))) {
4383 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004384 xmlChar *tmp;
4385
Owen Taylor3473f882001-02-23 17:55:21 +00004386 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004387 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4388 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004389 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004390 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004391 ctxt->instate = state;
4392 return;
4393 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004394 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004395 }
4396 count++;
4397 if (count > 50) {
4398 GROW;
4399 count = 0;
4400 }
4401 COPY_BUF(l,buf,len,cur);
4402 NEXTL(l);
4403 cur = CUR_CHAR(l);
4404 if (cur == 0) {
4405 SHRINK;
4406 GROW;
4407 cur = CUR_CHAR(l);
4408 }
4409 }
4410 buf[len] = 0;
4411 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004412 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4413 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004414 } else {
4415 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4417 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004418 }
4419 SKIP(2);
4420
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004421#ifdef LIBXML_CATALOG_ENABLED
4422 if (((state == XML_PARSER_MISC) ||
4423 (state == XML_PARSER_START)) &&
4424 (xmlStrEqual(target, XML_CATALOG_PI))) {
4425 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4426 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4427 (allow == XML_CATA_ALLOW_ALL))
4428 xmlParseCatalogPI(ctxt, buf);
4429 }
4430#endif
4431
4432
Owen Taylor3473f882001-02-23 17:55:21 +00004433 /*
4434 * SAX: PI detected.
4435 */
4436 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4437 (ctxt->sax->processingInstruction != NULL))
4438 ctxt->sax->processingInstruction(ctxt->userData,
4439 target, buf);
4440 }
4441 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004443 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 ctxt->instate = state;
4446 }
4447}
4448
4449/**
4450 * xmlParseNotationDecl:
4451 * @ctxt: an XML parser context
4452 *
4453 * parse a notation declaration
4454 *
4455 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4456 *
4457 * Hence there is actually 3 choices:
4458 * 'PUBLIC' S PubidLiteral
4459 * 'PUBLIC' S PubidLiteral S SystemLiteral
4460 * and 'SYSTEM' S SystemLiteral
4461 *
4462 * See the NOTE on xmlParseExternalID().
4463 */
4464
4465void
4466xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004467 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004468 xmlChar *Pubid;
4469 xmlChar *Systemid;
4470
Daniel Veillarda07050d2003-10-19 14:46:32 +00004471 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004472 xmlParserInputPtr input = ctxt->input;
4473 SHRINK;
4474 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004475 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004476 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4477 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004478 return;
4479 }
4480 SKIP_BLANKS;
4481
Daniel Veillard76d66f42001-05-16 21:05:17 +00004482 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004483 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004484 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004485 return;
4486 }
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004489 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 return;
4491 }
4492 SKIP_BLANKS;
4493
4494 /*
4495 * Parse the IDs.
4496 */
4497 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4498 SKIP_BLANKS;
4499
4500 if (RAW == '>') {
4501 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004502 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4503 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004504 }
4505 NEXT;
4506 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4507 (ctxt->sax->notationDecl != NULL))
4508 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4509 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004510 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
Owen Taylor3473f882001-02-23 17:55:21 +00004512 if (Systemid != NULL) xmlFree(Systemid);
4513 if (Pubid != NULL) xmlFree(Pubid);
4514 }
4515}
4516
4517/**
4518 * xmlParseEntityDecl:
4519 * @ctxt: an XML parser context
4520 *
4521 * parse <!ENTITY declarations
4522 *
4523 * [70] EntityDecl ::= GEDecl | PEDecl
4524 *
4525 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4526 *
4527 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4528 *
4529 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4530 *
4531 * [74] PEDef ::= EntityValue | ExternalID
4532 *
4533 * [76] NDataDecl ::= S 'NDATA' S Name
4534 *
4535 * [ VC: Notation Declared ]
4536 * The Name must match the declared name of a notation.
4537 */
4538
4539void
4540xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004541 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004542 xmlChar *value = NULL;
4543 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004544 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004545 int isParameter = 0;
4546 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004547 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004548
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004550 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004552 SHRINK;
4553 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004554 skipped = SKIP_BLANKS;
4555 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4557 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
Owen Taylor3473f882001-02-23 17:55:21 +00004559
4560 if (RAW == '%') {
4561 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004562 skipped = SKIP_BLANKS;
4563 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4565 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004566 }
Owen Taylor3473f882001-02-23 17:55:21 +00004567 isParameter = 1;
4568 }
4569
Daniel Veillard76d66f42001-05-16 21:05:17 +00004570 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004571 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004572 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4573 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004574 return;
4575 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004576 skipped = SKIP_BLANKS;
4577 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4579 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004580 }
Owen Taylor3473f882001-02-23 17:55:21 +00004581
Daniel Veillardf5582f12002-06-11 10:08:16 +00004582 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004583 /*
4584 * handle the various case of definitions...
4585 */
4586 if (isParameter) {
4587 if ((RAW == '"') || (RAW == '\'')) {
4588 value = xmlParseEntityValue(ctxt, &orig);
4589 if (value) {
4590 if ((ctxt->sax != NULL) &&
4591 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4592 ctxt->sax->entityDecl(ctxt->userData, name,
4593 XML_INTERNAL_PARAMETER_ENTITY,
4594 NULL, NULL, value);
4595 }
4596 } else {
4597 URI = xmlParseExternalID(ctxt, &literal, 1);
4598 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 }
4601 if (URI) {
4602 xmlURIPtr uri;
4603
4604 uri = xmlParseURI((const char *) URI);
4605 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004606 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4607 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004608 /*
4609 * This really ought to be a well formedness error
4610 * but the XML Core WG decided otherwise c.f. issue
4611 * E26 of the XML erratas.
4612 */
Owen Taylor3473f882001-02-23 17:55:21 +00004613 } else {
4614 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004615 /*
4616 * Okay this is foolish to block those but not
4617 * invalid URIs.
4618 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004619 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004620 } else {
4621 if ((ctxt->sax != NULL) &&
4622 (!ctxt->disableSAX) &&
4623 (ctxt->sax->entityDecl != NULL))
4624 ctxt->sax->entityDecl(ctxt->userData, name,
4625 XML_EXTERNAL_PARAMETER_ENTITY,
4626 literal, URI, NULL);
4627 }
4628 xmlFreeURI(uri);
4629 }
4630 }
4631 }
4632 } else {
4633 if ((RAW == '"') || (RAW == '\'')) {
4634 value = xmlParseEntityValue(ctxt, &orig);
4635 if ((ctxt->sax != NULL) &&
4636 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4637 ctxt->sax->entityDecl(ctxt->userData, name,
4638 XML_INTERNAL_GENERAL_ENTITY,
4639 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004640 /*
4641 * For expat compatibility in SAX mode.
4642 */
4643 if ((ctxt->myDoc == NULL) ||
4644 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4645 if (ctxt->myDoc == NULL) {
4646 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4647 }
4648 if (ctxt->myDoc->intSubset == NULL)
4649 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4650 BAD_CAST "fake", NULL, NULL);
4651
Daniel Veillard1af9a412003-08-20 22:54:39 +00004652 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4653 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 } else {
4656 URI = xmlParseExternalID(ctxt, &literal, 1);
4657 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004658 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
4660 if (URI) {
4661 xmlURIPtr uri;
4662
4663 uri = xmlParseURI((const char *)URI);
4664 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004665 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4666 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004667 /*
4668 * This really ought to be a well formedness error
4669 * but the XML Core WG decided otherwise c.f. issue
4670 * E26 of the XML erratas.
4671 */
Owen Taylor3473f882001-02-23 17:55:21 +00004672 } else {
4673 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004674 /*
4675 * Okay this is foolish to block those but not
4676 * invalid URIs.
4677 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 }
4680 xmlFreeURI(uri);
4681 }
4682 }
William M. Brack76e95df2003-10-18 16:20:14 +00004683 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004684 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4685 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004686 }
4687 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004688 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004689 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004690 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4692 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004693 }
4694 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004695 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4697 (ctxt->sax->unparsedEntityDecl != NULL))
4698 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4699 literal, URI, ndata);
4700 } else {
4701 if ((ctxt->sax != NULL) &&
4702 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4703 ctxt->sax->entityDecl(ctxt->userData, name,
4704 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4705 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004706 /*
4707 * For expat compatibility in SAX mode.
4708 * assuming the entity repalcement was asked for
4709 */
4710 if ((ctxt->replaceEntities != 0) &&
4711 ((ctxt->myDoc == NULL) ||
4712 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4713 if (ctxt->myDoc == NULL) {
4714 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4715 }
4716
4717 if (ctxt->myDoc->intSubset == NULL)
4718 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4719 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004720 xmlSAX2EntityDecl(ctxt, name,
4721 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4722 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004723 }
Owen Taylor3473f882001-02-23 17:55:21 +00004724 }
4725 }
4726 }
4727 SKIP_BLANKS;
4728 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004729 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004730 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 } else {
4732 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004733 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4734 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004735 }
4736 NEXT;
4737 }
4738 if (orig != NULL) {
4739 /*
4740 * Ugly mechanism to save the raw entity value.
4741 */
4742 xmlEntityPtr cur = NULL;
4743
4744 if (isParameter) {
4745 if ((ctxt->sax != NULL) &&
4746 (ctxt->sax->getParameterEntity != NULL))
4747 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4748 } else {
4749 if ((ctxt->sax != NULL) &&
4750 (ctxt->sax->getEntity != NULL))
4751 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004752 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004753 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004754 }
Owen Taylor3473f882001-02-23 17:55:21 +00004755 }
4756 if (cur != NULL) {
4757 if (cur->orig != NULL)
4758 xmlFree(orig);
4759 else
4760 cur->orig = orig;
4761 } else
4762 xmlFree(orig);
4763 }
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (value != NULL) xmlFree(value);
4765 if (URI != NULL) xmlFree(URI);
4766 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004767 }
4768}
4769
4770/**
4771 * xmlParseDefaultDecl:
4772 * @ctxt: an XML parser context
4773 * @value: Receive a possible fixed default value for the attribute
4774 *
4775 * Parse an attribute default declaration
4776 *
4777 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4778 *
4779 * [ VC: Required Attribute ]
4780 * if the default declaration is the keyword #REQUIRED, then the
4781 * attribute must be specified for all elements of the type in the
4782 * attribute-list declaration.
4783 *
4784 * [ VC: Attribute Default Legal ]
4785 * The declared default value must meet the lexical constraints of
4786 * the declared attribute type c.f. xmlValidateAttributeDecl()
4787 *
4788 * [ VC: Fixed Attribute Default ]
4789 * if an attribute has a default value declared with the #FIXED
4790 * keyword, instances of that attribute must match the default value.
4791 *
4792 * [ WFC: No < in Attribute Values ]
4793 * handled in xmlParseAttValue()
4794 *
4795 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4796 * or XML_ATTRIBUTE_FIXED.
4797 */
4798
4799int
4800xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4801 int val;
4802 xmlChar *ret;
4803
4804 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004805 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004806 SKIP(9);
4807 return(XML_ATTRIBUTE_REQUIRED);
4808 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 SKIP(8);
4811 return(XML_ATTRIBUTE_IMPLIED);
4812 }
4813 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004814 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004815 SKIP(6);
4816 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004817 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004818 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4819 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004820 }
4821 SKIP_BLANKS;
4822 }
4823 ret = xmlParseAttValue(ctxt);
4824 ctxt->instate = XML_PARSER_DTD;
4825 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004826 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004827 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 } else
4829 *value = ret;
4830 return(val);
4831}
4832
4833/**
4834 * xmlParseNotationType:
4835 * @ctxt: an XML parser context
4836 *
4837 * parse an Notation attribute type.
4838 *
4839 * Note: the leading 'NOTATION' S part has already being parsed...
4840 *
4841 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4842 *
4843 * [ VC: Notation Attributes ]
4844 * Values of this type must match one of the notation names included
4845 * in the declaration; all notation names in the declaration must be declared.
4846 *
4847 * Returns: the notation attribute tree built while parsing
4848 */
4849
4850xmlEnumerationPtr
4851xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004852 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004853 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4854
4855 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004856 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 return(NULL);
4858 }
4859 SHRINK;
4860 do {
4861 NEXT;
4862 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004863 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004864 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004865 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4866 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004867 return(ret);
4868 }
4869 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004870 if (cur == NULL) return(ret);
4871 if (last == NULL) ret = last = cur;
4872 else {
4873 last->next = cur;
4874 last = cur;
4875 }
4876 SKIP_BLANKS;
4877 } while (RAW == '|');
4878 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004879 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004880 if ((last != NULL) && (last != ret))
4881 xmlFreeEnumeration(last);
4882 return(ret);
4883 }
4884 NEXT;
4885 return(ret);
4886}
4887
4888/**
4889 * xmlParseEnumerationType:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse an Enumeration attribute type.
4893 *
4894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4895 *
4896 * [ VC: Enumeration ]
4897 * Values of this type must match one of the Nmtoken tokens in
4898 * the declaration
4899 *
4900 * Returns: the enumeration attribute tree built while parsing
4901 */
4902
4903xmlEnumerationPtr
4904xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4905 xmlChar *name;
4906 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4907
4908 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004910 return(NULL);
4911 }
4912 SHRINK;
4913 do {
4914 NEXT;
4915 SKIP_BLANKS;
4916 name = xmlParseNmtoken(ctxt);
4917 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 return(ret);
4920 }
4921 cur = xmlCreateEnumeration(name);
4922 xmlFree(name);
4923 if (cur == NULL) return(ret);
4924 if (last == NULL) ret = last = cur;
4925 else {
4926 last->next = cur;
4927 last = cur;
4928 }
4929 SKIP_BLANKS;
4930 } while (RAW == '|');
4931 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004932 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 return(ret);
4934 }
4935 NEXT;
4936 return(ret);
4937}
4938
4939/**
4940 * xmlParseEnumeratedType:
4941 * @ctxt: an XML parser context
4942 * @tree: the enumeration tree built while parsing
4943 *
4944 * parse an Enumerated attribute type.
4945 *
4946 * [57] EnumeratedType ::= NotationType | Enumeration
4947 *
4948 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4949 *
4950 *
4951 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4952 */
4953
4954int
4955xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004956 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004957 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4960 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 return(0);
4962 }
4963 SKIP_BLANKS;
4964 *tree = xmlParseNotationType(ctxt);
4965 if (*tree == NULL) return(0);
4966 return(XML_ATTRIBUTE_NOTATION);
4967 }
4968 *tree = xmlParseEnumerationType(ctxt);
4969 if (*tree == NULL) return(0);
4970 return(XML_ATTRIBUTE_ENUMERATION);
4971}
4972
4973/**
4974 * xmlParseAttributeType:
4975 * @ctxt: an XML parser context
4976 * @tree: the enumeration tree built while parsing
4977 *
4978 * parse the Attribute list def for an element
4979 *
4980 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4981 *
4982 * [55] StringType ::= 'CDATA'
4983 *
4984 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4985 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4986 *
4987 * Validity constraints for attribute values syntax are checked in
4988 * xmlValidateAttributeValue()
4989 *
4990 * [ VC: ID ]
4991 * Values of type ID must match the Name production. A name must not
4992 * appear more than once in an XML document as a value of this type;
4993 * i.e., ID values must uniquely identify the elements which bear them.
4994 *
4995 * [ VC: One ID per Element Type ]
4996 * No element type may have more than one ID attribute specified.
4997 *
4998 * [ VC: ID Attribute Default ]
4999 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5000 *
5001 * [ VC: IDREF ]
5002 * Values of type IDREF must match the Name production, and values
5003 * of type IDREFS must match Names; each IDREF Name must match the value
5004 * of an ID attribute on some element in the XML document; i.e. IDREF
5005 * values must match the value of some ID attribute.
5006 *
5007 * [ VC: Entity Name ]
5008 * Values of type ENTITY must match the Name production, values
5009 * of type ENTITIES must match Names; each Entity Name must match the
5010 * name of an unparsed entity declared in the DTD.
5011 *
5012 * [ VC: Name Token ]
5013 * Values of type NMTOKEN must match the Nmtoken production; values
5014 * of type NMTOKENS must match Nmtokens.
5015 *
5016 * Returns the attribute type
5017 */
5018int
5019xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5020 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005021 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 SKIP(5);
5023 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005024 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005025 SKIP(6);
5026 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005027 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005028 SKIP(5);
5029 return(XML_ATTRIBUTE_IDREF);
5030 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5031 SKIP(2);
5032 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005033 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005034 SKIP(6);
5035 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005036 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005037 SKIP(8);
5038 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005039 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005040 SKIP(8);
5041 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005042 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005043 SKIP(7);
5044 return(XML_ATTRIBUTE_NMTOKEN);
5045 }
5046 return(xmlParseEnumeratedType(ctxt, tree));
5047}
5048
5049/**
5050 * xmlParseAttributeListDecl:
5051 * @ctxt: an XML parser context
5052 *
5053 * : parse the Attribute list def for an element
5054 *
5055 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5056 *
5057 * [53] AttDef ::= S Name S AttType S DefaultDecl
5058 *
5059 */
5060void
5061xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005062 const xmlChar *elemName;
5063 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 xmlEnumerationPtr tree;
5065
Daniel Veillarda07050d2003-10-19 14:46:32 +00005066 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005067 xmlParserInputPtr input = ctxt->input;
5068
5069 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005070 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005073 }
5074 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005075 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005077 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5078 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005079 return;
5080 }
5081 SKIP_BLANKS;
5082 GROW;
5083 while (RAW != '>') {
5084 const xmlChar *check = CUR_PTR;
5085 int type;
5086 int def;
5087 xmlChar *defaultValue = NULL;
5088
5089 GROW;
5090 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005091 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005092 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005093 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5094 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005095 break;
5096 }
5097 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005098 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005100 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005101 break;
5102 }
5103 SKIP_BLANKS;
5104
5105 type = xmlParseAttributeType(ctxt, &tree);
5106 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005107 break;
5108 }
5109
5110 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005111 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005112 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5113 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if (tree != NULL)
5115 xmlFreeEnumeration(tree);
5116 break;
5117 }
5118 SKIP_BLANKS;
5119
5120 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5121 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if (defaultValue != NULL)
5123 xmlFree(defaultValue);
5124 if (tree != NULL)
5125 xmlFreeEnumeration(tree);
5126 break;
5127 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005128 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5129 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005130
5131 GROW;
5132 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005133 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005135 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005136 if (defaultValue != NULL)
5137 xmlFree(defaultValue);
5138 if (tree != NULL)
5139 xmlFreeEnumeration(tree);
5140 break;
5141 }
5142 SKIP_BLANKS;
5143 }
5144 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5146 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 if (defaultValue != NULL)
5148 xmlFree(defaultValue);
5149 if (tree != NULL)
5150 xmlFreeEnumeration(tree);
5151 break;
5152 }
5153 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5154 (ctxt->sax->attributeDecl != NULL))
5155 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5156 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005157 else if (tree != NULL)
5158 xmlFreeEnumeration(tree);
5159
5160 if ((ctxt->sax2) && (defaultValue != NULL) &&
5161 (def != XML_ATTRIBUTE_IMPLIED) &&
5162 (def != XML_ATTRIBUTE_REQUIRED)) {
5163 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5164 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005165 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005166 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5167 }
Owen Taylor3473f882001-02-23 17:55:21 +00005168 if (defaultValue != NULL)
5169 xmlFree(defaultValue);
5170 GROW;
5171 }
5172 if (RAW == '>') {
5173 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5175 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005176 }
5177 NEXT;
5178 }
Owen Taylor3473f882001-02-23 17:55:21 +00005179 }
5180}
5181
5182/**
5183 * xmlParseElementMixedContentDecl:
5184 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005185 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005186 *
5187 * parse the declaration for a Mixed Element content
5188 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5189 *
5190 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5191 * '(' S? '#PCDATA' S? ')'
5192 *
5193 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5194 *
5195 * [ VC: No Duplicate Types ]
5196 * The same name must not appear more than once in a single
5197 * mixed-content declaration.
5198 *
5199 * returns: the list of the xmlElementContentPtr describing the element choices
5200 */
5201xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005202xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005203 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005204 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005205
5206 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005207 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005208 SKIP(7);
5209 SKIP_BLANKS;
5210 SHRINK;
5211 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005213 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5214"Element content declaration doesn't start and stop in the same entity\n",
5215 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005216 }
Owen Taylor3473f882001-02-23 17:55:21 +00005217 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (RAW == '*') {
5220 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5221 NEXT;
5222 }
5223 return(ret);
5224 }
5225 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 if (ret == NULL) return(NULL);
5228 }
5229 while (RAW == '|') {
5230 NEXT;
5231 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005232 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 if (ret == NULL) return(NULL);
5234 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005235 if (cur != NULL)
5236 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005237 cur = ret;
5238 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005241 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005242 if (n->c1 != NULL)
5243 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005244 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005245 if (n != NULL)
5246 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005247 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005248 }
5249 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005250 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005251 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005253 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 return(NULL);
5256 }
5257 SKIP_BLANKS;
5258 GROW;
5259 }
5260 if ((RAW == ')') && (NXT(1) == '*')) {
5261 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005262 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005263 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005264 if (cur->c2 != NULL)
5265 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 }
5267 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005269 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5270"Element content declaration doesn't start and stop in the same entity\n",
5271 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273 SKIP(2);
5274 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005275 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005276 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005277 return(NULL);
5278 }
5279
5280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005281 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 return(ret);
5284}
5285
5286/**
5287 * xmlParseElementChildrenContentDecl:
5288 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005289 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005290 *
5291 * parse the declaration for a Mixed Element content
5292 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5293 *
5294 *
5295 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5296 *
5297 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5298 *
5299 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5300 *
5301 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5302 *
5303 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5304 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005305 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005306 * opening or closing parentheses in a choice, seq, or Mixed
5307 * construct is contained in the replacement text for a parameter
5308 * entity, both must be contained in the same replacement text. For
5309 * interoperability, if a parameter-entity reference appears in a
5310 * choice, seq, or Mixed construct, its replacement text should not
5311 * be empty, and neither the first nor last non-blank character of
5312 * the replacement text should be a connector (| or ,).
5313 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005314 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005315 * hierarchy.
5316 */
5317xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005319 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005320 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 xmlChar type = 0;
5322
5323 SKIP_BLANKS;
5324 GROW;
5325 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005326 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005327
Owen Taylor3473f882001-02-23 17:55:21 +00005328 /* Recurse on first child */
5329 NEXT;
5330 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005331 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005332 SKIP_BLANKS;
5333 GROW;
5334 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005335 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005337 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 return(NULL);
5339 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005340 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005341 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005343 return(NULL);
5344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345 GROW;
5346 if (RAW == '?') {
5347 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5348 NEXT;
5349 } else if (RAW == '*') {
5350 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5351 NEXT;
5352 } else if (RAW == '+') {
5353 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5354 NEXT;
5355 } else {
5356 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5357 }
Owen Taylor3473f882001-02-23 17:55:21 +00005358 GROW;
5359 }
5360 SKIP_BLANKS;
5361 SHRINK;
5362 while (RAW != ')') {
5363 /*
5364 * Each loop we parse one separator and one element.
5365 */
5366 if (RAW == ',') {
5367 if (type == 0) type = CUR;
5368
5369 /*
5370 * Detect "Name | Name , Name" error
5371 */
5372 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005374 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005375 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005376 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005377 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005378 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005379 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 return(NULL);
5381 }
5382 NEXT;
5383
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005384 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005385 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005386 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005387 xmlFreeDocElementContent(ctxt->myDoc, last);
5388 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005389 return(NULL);
5390 }
5391 if (last == NULL) {
5392 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005393 if (ret != NULL)
5394 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005395 ret = cur = op;
5396 } else {
5397 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005398 if (op != NULL)
5399 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005401 if (last != NULL)
5402 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 cur =op;
5404 last = NULL;
5405 }
5406 } else if (RAW == '|') {
5407 if (type == 0) type = CUR;
5408
5409 /*
5410 * Detect "Name , Name | Name" error
5411 */
5412 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005413 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005414 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005415 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005416 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005417 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005419 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 return(NULL);
5421 }
5422 NEXT;
5423
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005424 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005425 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005426 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005427 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005429 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005430 return(NULL);
5431 }
5432 if (last == NULL) {
5433 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005434 if (ret != NULL)
5435 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 ret = cur = op;
5437 } else {
5438 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005439 if (op != NULL)
5440 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005441 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005442 if (last != NULL)
5443 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005444 cur =op;
5445 last = NULL;
5446 }
5447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005448 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005449 if ((last != NULL) && (last != ret))
5450 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005452 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 return(NULL);
5454 }
5455 GROW;
5456 SKIP_BLANKS;
5457 GROW;
5458 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005459 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005460 /* Recurse on second child */
5461 NEXT;
5462 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005463 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 SKIP_BLANKS;
5465 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005466 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005467 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005470 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005471 return(NULL);
5472 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005473 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005474 if (RAW == '?') {
5475 last->ocur = XML_ELEMENT_CONTENT_OPT;
5476 NEXT;
5477 } else if (RAW == '*') {
5478 last->ocur = XML_ELEMENT_CONTENT_MULT;
5479 NEXT;
5480 } else if (RAW == '+') {
5481 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5482 NEXT;
5483 } else {
5484 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5485 }
5486 }
5487 SKIP_BLANKS;
5488 GROW;
5489 }
5490 if ((cur != NULL) && (last != NULL)) {
5491 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005492 if (last != NULL)
5493 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005495 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005496 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5497"Element content declaration doesn't start and stop in the same entity\n",
5498 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005499 }
Owen Taylor3473f882001-02-23 17:55:21 +00005500 NEXT;
5501 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005502 if (ret != NULL) {
5503 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5504 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5505 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5506 else
5507 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5508 }
Owen Taylor3473f882001-02-23 17:55:21 +00005509 NEXT;
5510 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005511 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005512 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005513 cur = ret;
5514 /*
5515 * Some normalization:
5516 * (a | b* | c?)* == (a | b | c)*
5517 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005518 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005519 if ((cur->c1 != NULL) &&
5520 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5521 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5522 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5523 if ((cur->c2 != NULL) &&
5524 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5525 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5526 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5527 cur = cur->c2;
5528 }
5529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 NEXT;
5531 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005532 if (ret != NULL) {
5533 int found = 0;
5534
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005535 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5536 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5537 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005538 else
5539 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005540 /*
5541 * Some normalization:
5542 * (a | b*)+ == (a | b)*
5543 * (a | b?)+ == (a | b)*
5544 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005545 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005546 if ((cur->c1 != NULL) &&
5547 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5548 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5549 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5550 found = 1;
5551 }
5552 if ((cur->c2 != NULL) &&
5553 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5554 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5555 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5556 found = 1;
5557 }
5558 cur = cur->c2;
5559 }
5560 if (found)
5561 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5562 }
Owen Taylor3473f882001-02-23 17:55:21 +00005563 NEXT;
5564 }
5565 return(ret);
5566}
5567
5568/**
5569 * xmlParseElementContentDecl:
5570 * @ctxt: an XML parser context
5571 * @name: the name of the element being defined.
5572 * @result: the Element Content pointer will be stored here if any
5573 *
5574 * parse the declaration for an Element content either Mixed or Children,
5575 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5576 *
5577 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5578 *
5579 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5580 */
5581
5582int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005583xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005584 xmlElementContentPtr *result) {
5585
5586 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005588 int res;
5589
5590 *result = NULL;
5591
5592 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005593 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005594 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005595 return(-1);
5596 }
5597 NEXT;
5598 GROW;
5599 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005600 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005601 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 res = XML_ELEMENT_TYPE_MIXED;
5603 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005604 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005605 res = XML_ELEMENT_TYPE_ELEMENT;
5606 }
Owen Taylor3473f882001-02-23 17:55:21 +00005607 SKIP_BLANKS;
5608 *result = tree;
5609 return(res);
5610}
5611
5612/**
5613 * xmlParseElementDecl:
5614 * @ctxt: an XML parser context
5615 *
5616 * parse an Element declaration.
5617 *
5618 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5619 *
5620 * [ VC: Unique Element Type Declaration ]
5621 * No element type may be declared more than once
5622 *
5623 * Returns the type of the element, or -1 in case of error
5624 */
5625int
5626xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005627 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005628 int ret = -1;
5629 xmlElementContentPtr content = NULL;
5630
Daniel Veillard4c778d82005-01-23 17:37:44 +00005631 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005632 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005633 xmlParserInputPtr input = ctxt->input;
5634
5635 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005636 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5638 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005639 }
5640 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005641 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5644 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005645 return(-1);
5646 }
5647 while ((RAW == 0) && (ctxt->inputNr > 1))
5648 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005649 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005650 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5651 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005652 }
5653 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005654 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005655 SKIP(5);
5656 /*
5657 * Element must always be empty.
5658 */
5659 ret = XML_ELEMENT_TYPE_EMPTY;
5660 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5661 (NXT(2) == 'Y')) {
5662 SKIP(3);
5663 /*
5664 * Element is a generic container.
5665 */
5666 ret = XML_ELEMENT_TYPE_ANY;
5667 } else if (RAW == '(') {
5668 ret = xmlParseElementContentDecl(ctxt, name, &content);
5669 } else {
5670 /*
5671 * [ WFC: PEs in Internal Subset ] error handling.
5672 */
5673 if ((RAW == '%') && (ctxt->external == 0) &&
5674 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005675 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005676 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005677 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005678 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005679 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5680 }
Owen Taylor3473f882001-02-23 17:55:21 +00005681 return(-1);
5682 }
5683
5684 SKIP_BLANKS;
5685 /*
5686 * Pop-up of finished entities.
5687 */
5688 while ((RAW == 0) && (ctxt->inputNr > 1))
5689 xmlPopInput(ctxt);
5690 SKIP_BLANKS;
5691
5692 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005693 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005694 if (content != NULL) {
5695 xmlFreeDocElementContent(ctxt->myDoc, content);
5696 }
Owen Taylor3473f882001-02-23 17:55:21 +00005697 } else {
5698 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005699 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5700 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005701 }
5702
5703 NEXT;
5704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005705 (ctxt->sax->elementDecl != NULL)) {
5706 if (content != NULL)
5707 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005708 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5709 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005710 if ((content != NULL) && (content->parent == NULL)) {
5711 /*
5712 * this is a trick: if xmlAddElementDecl is called,
5713 * instead of copying the full tree it is plugged directly
5714 * if called from the parser. Avoid duplicating the
5715 * interfaces or change the API/ABI
5716 */
5717 xmlFreeDocElementContent(ctxt->myDoc, content);
5718 }
5719 } else if (content != NULL) {
5720 xmlFreeDocElementContent(ctxt->myDoc, content);
5721 }
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
Owen Taylor3473f882001-02-23 17:55:21 +00005723 }
5724 return(ret);
5725}
5726
5727/**
Owen Taylor3473f882001-02-23 17:55:21 +00005728 * xmlParseConditionalSections
5729 * @ctxt: an XML parser context
5730 *
5731 * [61] conditionalSect ::= includeSect | ignoreSect
5732 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5733 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5734 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5735 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5736 */
5737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005738static void
Owen Taylor3473f882001-02-23 17:55:21 +00005739xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5740 SKIP(3);
5741 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005742 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005743 SKIP(7);
5744 SKIP_BLANKS;
5745 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 } else {
5748 NEXT;
5749 }
5750 if (xmlParserDebugEntities) {
5751 if ((ctxt->input != NULL) && (ctxt->input->filename))
5752 xmlGenericError(xmlGenericErrorContext,
5753 "%s(%d): ", ctxt->input->filename,
5754 ctxt->input->line);
5755 xmlGenericError(xmlGenericErrorContext,
5756 "Entering INCLUDE Conditional Section\n");
5757 }
5758
5759 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5760 (NXT(2) != '>'))) {
5761 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005762 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005763
5764 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5765 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005766 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005767 NEXT;
5768 } else if (RAW == '%') {
5769 xmlParsePEReference(ctxt);
5770 } else
5771 xmlParseMarkupDecl(ctxt);
5772
5773 /*
5774 * Pop-up of finished entities.
5775 */
5776 while ((RAW == 0) && (ctxt->inputNr > 1))
5777 xmlPopInput(ctxt);
5778
Daniel Veillardfdc91562002-07-01 21:52:03 +00005779 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005780 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005781 break;
5782 }
5783 }
5784 if (xmlParserDebugEntities) {
5785 if ((ctxt->input != NULL) && (ctxt->input->filename))
5786 xmlGenericError(xmlGenericErrorContext,
5787 "%s(%d): ", ctxt->input->filename,
5788 ctxt->input->line);
5789 xmlGenericError(xmlGenericErrorContext,
5790 "Leaving INCLUDE Conditional Section\n");
5791 }
5792
Daniel Veillarda07050d2003-10-19 14:46:32 +00005793 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005794 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005795 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005796 int depth = 0;
5797
5798 SKIP(6);
5799 SKIP_BLANKS;
5800 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 } else {
5803 NEXT;
5804 }
5805 if (xmlParserDebugEntities) {
5806 if ((ctxt->input != NULL) && (ctxt->input->filename))
5807 xmlGenericError(xmlGenericErrorContext,
5808 "%s(%d): ", ctxt->input->filename,
5809 ctxt->input->line);
5810 xmlGenericError(xmlGenericErrorContext,
5811 "Entering IGNORE Conditional Section\n");
5812 }
5813
5814 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005815 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005816 * But disable SAX event generating DTD building in the meantime
5817 */
5818 state = ctxt->disableSAX;
5819 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 ctxt->instate = XML_PARSER_IGNORE;
5822
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005823 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005824 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5825 depth++;
5826 SKIP(3);
5827 continue;
5828 }
5829 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5830 if (--depth >= 0) SKIP(3);
5831 continue;
5832 }
5833 NEXT;
5834 continue;
5835 }
5836
5837 ctxt->disableSAX = state;
5838 ctxt->instate = instate;
5839
5840 if (xmlParserDebugEntities) {
5841 if ((ctxt->input != NULL) && (ctxt->input->filename))
5842 xmlGenericError(xmlGenericErrorContext,
5843 "%s(%d): ", ctxt->input->filename,
5844 ctxt->input->line);
5845 xmlGenericError(xmlGenericErrorContext,
5846 "Leaving IGNORE Conditional Section\n");
5847 }
5848
5849 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005850 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 }
5852
5853 if (RAW == 0)
5854 SHRINK;
5855
5856 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005857 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 } else {
5859 SKIP(3);
5860 }
5861}
5862
5863/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005864 * xmlParseMarkupDecl:
5865 * @ctxt: an XML parser context
5866 *
5867 * parse Markup declarations
5868 *
5869 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5870 * NotationDecl | PI | Comment
5871 *
5872 * [ VC: Proper Declaration/PE Nesting ]
5873 * Parameter-entity replacement text must be properly nested with
5874 * markup declarations. That is to say, if either the first character
5875 * or the last character of a markup declaration (markupdecl above) is
5876 * contained in the replacement text for a parameter-entity reference,
5877 * both must be contained in the same replacement text.
5878 *
5879 * [ WFC: PEs in Internal Subset ]
5880 * In the internal DTD subset, parameter-entity references can occur
5881 * only where markup declarations can occur, not within markup declarations.
5882 * (This does not apply to references that occur in external parameter
5883 * entities or to the external subset.)
5884 */
5885void
5886xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5887 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005888 if (CUR == '<') {
5889 if (NXT(1) == '!') {
5890 switch (NXT(2)) {
5891 case 'E':
5892 if (NXT(3) == 'L')
5893 xmlParseElementDecl(ctxt);
5894 else if (NXT(3) == 'N')
5895 xmlParseEntityDecl(ctxt);
5896 break;
5897 case 'A':
5898 xmlParseAttributeListDecl(ctxt);
5899 break;
5900 case 'N':
5901 xmlParseNotationDecl(ctxt);
5902 break;
5903 case '-':
5904 xmlParseComment(ctxt);
5905 break;
5906 default:
5907 /* there is an error but it will be detected later */
5908 break;
5909 }
5910 } else if (NXT(1) == '?') {
5911 xmlParsePI(ctxt);
5912 }
5913 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005914 /*
5915 * This is only for internal subset. On external entities,
5916 * the replacement is done before parsing stage
5917 */
5918 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5919 xmlParsePEReference(ctxt);
5920
5921 /*
5922 * Conditional sections are allowed from entities included
5923 * by PE References in the internal subset.
5924 */
5925 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5926 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5927 xmlParseConditionalSections(ctxt);
5928 }
5929 }
5930
5931 ctxt->instate = XML_PARSER_DTD;
5932}
5933
5934/**
5935 * xmlParseTextDecl:
5936 * @ctxt: an XML parser context
5937 *
5938 * parse an XML declaration header for external entities
5939 *
5940 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5941 *
5942 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5943 */
5944
5945void
5946xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5947 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005948 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005949
5950 /*
5951 * We know that '<?xml' is here.
5952 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005953 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005954 SKIP(5);
5955 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005956 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005957 return;
5958 }
5959
William M. Brack76e95df2003-10-18 16:20:14 +00005960 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005961 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5962 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005963 }
5964 SKIP_BLANKS;
5965
5966 /*
5967 * We may have the VersionInfo here.
5968 */
5969 version = xmlParseVersionInfo(ctxt);
5970 if (version == NULL)
5971 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005972 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005973 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5975 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005976 }
5977 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005978 ctxt->input->version = version;
5979
5980 /*
5981 * We must have the encoding declaration
5982 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005983 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005984 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5985 /*
5986 * The XML REC instructs us to stop parsing right here
5987 */
5988 return;
5989 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005990 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5991 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5992 "Missing encoding in text declaration\n");
5993 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005994
5995 SKIP_BLANKS;
5996 if ((RAW == '?') && (NXT(1) == '>')) {
5997 SKIP(2);
5998 } else if (RAW == '>') {
5999 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006000 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006001 NEXT;
6002 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006003 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006004 MOVETO_ENDTAG(CUR_PTR);
6005 NEXT;
6006 }
6007}
6008
6009/**
Owen Taylor3473f882001-02-23 17:55:21 +00006010 * xmlParseExternalSubset:
6011 * @ctxt: an XML parser context
6012 * @ExternalID: the external identifier
6013 * @SystemID: the system identifier (or URL)
6014 *
6015 * parse Markup declarations from an external subset
6016 *
6017 * [30] extSubset ::= textDecl? extSubsetDecl
6018 *
6019 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6020 */
6021void
6022xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6023 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006024 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006025 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006026 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 xmlParseTextDecl(ctxt);
6028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6029 /*
6030 * The XML REC instructs us to stop parsing right here
6031 */
6032 ctxt->instate = XML_PARSER_EOF;
6033 return;
6034 }
6035 }
6036 if (ctxt->myDoc == NULL) {
6037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6038 }
6039 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6040 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6041
6042 ctxt->instate = XML_PARSER_DTD;
6043 ctxt->external = 1;
6044 while (((RAW == '<') && (NXT(1) == '?')) ||
6045 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006046 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006047 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006048 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006049
6050 GROW;
6051 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6052 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006053 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006054 NEXT;
6055 } else if (RAW == '%') {
6056 xmlParsePEReference(ctxt);
6057 } else
6058 xmlParseMarkupDecl(ctxt);
6059
6060 /*
6061 * Pop-up of finished entities.
6062 */
6063 while ((RAW == 0) && (ctxt->inputNr > 1))
6064 xmlPopInput(ctxt);
6065
Daniel Veillardfdc91562002-07-01 21:52:03 +00006066 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006067 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 break;
6069 }
6070 }
6071
6072 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006073 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 }
6075
6076}
6077
6078/**
6079 * xmlParseReference:
6080 * @ctxt: an XML parser context
6081 *
6082 * parse and handle entity references in content, depending on the SAX
6083 * interface, this may end-up in a call to character() if this is a
6084 * CharRef, a predefined entity, if there is no reference() callback.
6085 * or if the parser was asked to switch to that mode.
6086 *
6087 * [67] Reference ::= EntityRef | CharRef
6088 */
6089void
6090xmlParseReference(xmlParserCtxtPtr ctxt) {
6091 xmlEntityPtr ent;
6092 xmlChar *val;
6093 if (RAW != '&') return;
6094
6095 if (NXT(1) == '#') {
6096 int i = 0;
6097 xmlChar out[10];
6098 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006099 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006100
Daniel Veillarddc171602008-03-26 17:41:38 +00006101 if (value == 0)
6102 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6104 /*
6105 * So we are using non-UTF-8 buffers
6106 * Check that the char fit on 8bits, if not
6107 * generate a CharRef.
6108 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006109 if (value <= 0xFF) {
6110 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006111 out[1] = 0;
6112 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6113 (!ctxt->disableSAX))
6114 ctxt->sax->characters(ctxt->userData, out, 1);
6115 } else {
6116 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006117 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006119 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6121 (!ctxt->disableSAX))
6122 ctxt->sax->reference(ctxt->userData, out);
6123 }
6124 } else {
6125 /*
6126 * Just encode the value in UTF-8
6127 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006128 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006129 out[i] = 0;
6130 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6131 (!ctxt->disableSAX))
6132 ctxt->sax->characters(ctxt->userData, out, i);
6133 }
6134 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006135 int was_checked;
6136
Owen Taylor3473f882001-02-23 17:55:21 +00006137 ent = xmlParseEntityRef(ctxt);
6138 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006139 if (!ctxt->wellFormed)
6140 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006141 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006142 if ((ent->name != NULL) &&
6143 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6144 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006145 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006146
6147
6148 /*
6149 * The first reference to the entity trigger a parsing phase
6150 * where the ent->children is filled with the result from
6151 * the parsing.
6152 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006153 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006154 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006155
Owen Taylor3473f882001-02-23 17:55:21 +00006156 value = ent->content;
6157
6158 /*
6159 * Check that this entity is well formed
6160 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006161 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006162 (value[1] == 0) && (value[0] == '<') &&
6163 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6164 /*
6165 * DONE: get definite answer on this !!!
6166 * Lots of entity decls are used to declare a single
6167 * char
6168 * <!ENTITY lt "<">
6169 * Which seems to be valid since
6170 * 2.4: The ampersand character (&) and the left angle
6171 * bracket (<) may appear in their literal form only
6172 * when used ... They are also legal within the literal
6173 * entity value of an internal entity declaration;i
6174 * see "4.3.2 Well-Formed Parsed Entities".
6175 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6176 * Looking at the OASIS test suite and James Clark
6177 * tests, this is broken. However the XML REC uses
6178 * it. Is the XML REC not well-formed ????
6179 * This is a hack to avoid this problem
6180 *
6181 * ANSWER: since lt gt amp .. are already defined,
6182 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006183 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006184 * is lousy but acceptable.
6185 */
6186 list = xmlNewDocText(ctxt->myDoc, value);
6187 if (list != NULL) {
6188 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6189 (ent->children == NULL)) {
6190 ent->children = list;
6191 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006192 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006193 list->parent = (xmlNodePtr) ent;
6194 } else {
6195 xmlFreeNodeList(list);
6196 }
6197 } else if (list != NULL) {
6198 xmlFreeNodeList(list);
6199 }
6200 } else {
6201 /*
6202 * 4.3.2: An internal general parsed entity is well-formed
6203 * if its replacement text matches the production labeled
6204 * content.
6205 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006206
6207 void *user_data;
6208 /*
6209 * This is a bit hackish but this seems the best
6210 * way to make sure both SAX and DOM entity support
6211 * behaves okay.
6212 */
6213 if (ctxt->userData == ctxt)
6214 user_data = NULL;
6215 else
6216 user_data = ctxt->userData;
6217
Owen Taylor3473f882001-02-23 17:55:21 +00006218 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6219 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006220 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6221 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 ctxt->depth--;
6223 } else if (ent->etype ==
6224 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6225 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006226 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006227 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006228 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006229 ctxt->depth--;
6230 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006231 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006232 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6233 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006234 }
6235 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006237 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006238 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006239 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6240 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006241 (ent->children == NULL)) {
6242 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006243 if (ctxt->replaceEntities) {
6244 /*
6245 * Prune it directly in the generated document
6246 * except for single text nodes.
6247 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006248 if (((list->type == XML_TEXT_NODE) &&
6249 (list->next == NULL)) ||
6250 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006251 list->parent = (xmlNodePtr) ent;
6252 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006253 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006254 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006255 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006256 while (list != NULL) {
6257 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006258 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006259 if (list->next == NULL)
6260 ent->last = list;
6261 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006262 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006263 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006264#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006265 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6266 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006267#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006268 }
6269 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006270 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006271 while (list != NULL) {
6272 list->parent = (xmlNodePtr) ent;
6273 if (list->next == NULL)
6274 ent->last = list;
6275 list = list->next;
6276 }
Owen Taylor3473f882001-02-23 17:55:21 +00006277 }
6278 } else {
6279 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006280 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006281 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006282 } else if ((ret != XML_ERR_OK) &&
6283 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006284 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6285 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 } else if (list != NULL) {
6287 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006288 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006289 }
6290 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006291 ent->checked = 1;
6292 }
6293
6294 if (ent->children == NULL) {
6295 /*
6296 * Probably running in SAX mode and the callbacks don't
6297 * build the entity content. So unless we already went
6298 * though parsing for first checking go though the entity
6299 * content to generate callbacks associated to the entity
6300 */
6301 if (was_checked == 1) {
6302 void *user_data;
6303 /*
6304 * This is a bit hackish but this seems the best
6305 * way to make sure both SAX and DOM entity support
6306 * behaves okay.
6307 */
6308 if (ctxt->userData == ctxt)
6309 user_data = NULL;
6310 else
6311 user_data = ctxt->userData;
6312
6313 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6314 ctxt->depth++;
6315 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6316 ent->content, user_data, NULL);
6317 ctxt->depth--;
6318 } else if (ent->etype ==
6319 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6320 ctxt->depth++;
6321 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6322 ctxt->sax, user_data, ctxt->depth,
6323 ent->URI, ent->ExternalID, NULL);
6324 ctxt->depth--;
6325 } else {
6326 ret = XML_ERR_ENTITY_PE_INTERNAL;
6327 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6328 "invalid entity type found\n", NULL);
6329 }
6330 if (ret == XML_ERR_ENTITY_LOOP) {
6331 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6332 return;
6333 }
6334 }
6335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6337 /*
6338 * Entity reference callback comes second, it's somewhat
6339 * superfluous but a compatibility to historical behaviour
6340 */
6341 ctxt->sax->reference(ctxt->userData, ent->name);
6342 }
6343 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006344 }
6345 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006346 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006347 /*
6348 * Create a node.
6349 */
6350 ctxt->sax->reference(ctxt->userData, ent->name);
6351 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006352 }
6353 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006354 /*
6355 * There is a problem on the handling of _private for entities
6356 * (bug 155816): Should we copy the content of the field from
6357 * the entity (possibly overwriting some value set by the user
6358 * when a copy is created), should we leave it alone, or should
6359 * we try to take care of different situations? The problem
6360 * is exacerbated by the usage of this field by the xmlReader.
6361 * To fix this bug, we look at _private on the created node
6362 * and, if it's NULL, we copy in whatever was in the entity.
6363 * If it's not NULL we leave it alone. This is somewhat of a
6364 * hack - maybe we should have further tests to determine
6365 * what to do.
6366 */
Owen Taylor3473f882001-02-23 17:55:21 +00006367 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6368 /*
6369 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006370 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006371 * In the first occurrence list contains the replacement.
6372 * progressive == 2 means we are operating on the Reader
6373 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006374 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006375 if (((list == NULL) && (ent->owner == 0)) ||
6376 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006377 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006378
6379 /*
6380 * when operating on a reader, the entities definitions
6381 * are always owning the entities subtree.
6382 if (ctxt->parseMode == XML_PARSE_READER)
6383 ent->owner = 1;
6384 */
6385
Daniel Veillard62f313b2001-07-04 19:49:14 +00006386 cur = ent->children;
6387 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006388 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006389 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006390 if (nw->_private == NULL)
6391 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006392 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006393 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006394 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006395 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006396 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006397 if (cur == ent->last) {
6398 /*
6399 * needed to detect some strange empty
6400 * node cases in the reader tests
6401 */
6402 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006403 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006404 (nw->type == XML_ELEMENT_NODE) &&
6405 (nw->children == NULL))
6406 nw->extra = 1;
6407
Daniel Veillard62f313b2001-07-04 19:49:14 +00006408 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006409 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006410 cur = cur->next;
6411 }
Daniel Veillard81273902003-09-30 00:43:48 +00006412#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006413 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006414 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006415#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006416 } else if (list == NULL) {
6417 xmlNodePtr nw = NULL, cur, next, last,
6418 firstChild = NULL;
6419 /*
6420 * Copy the entity child list and make it the new
6421 * entity child list. The goal is to make sure any
6422 * ID or REF referenced will be the one from the
6423 * document content and not the entity copy.
6424 */
6425 cur = ent->children;
6426 ent->children = NULL;
6427 last = ent->last;
6428 ent->last = NULL;
6429 while (cur != NULL) {
6430 next = cur->next;
6431 cur->next = NULL;
6432 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006433 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006434 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006435 if (nw->_private == NULL)
6436 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006437 if (firstChild == NULL){
6438 firstChild = cur;
6439 }
6440 xmlAddChild((xmlNodePtr) ent, nw);
6441 xmlAddChild(ctxt->node, cur);
6442 }
6443 if (cur == last)
6444 break;
6445 cur = next;
6446 }
6447 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006448#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006449 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6450 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006451#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006452 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006453 const xmlChar *nbktext;
6454
Daniel Veillard62f313b2001-07-04 19:49:14 +00006455 /*
6456 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006457 * node with a possible previous text one which
6458 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006459 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006460 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6461 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006462 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006463 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006464 if ((ent->last != ent->children) &&
6465 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006466 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006467 xmlAddChildList(ctxt->node, ent->children);
6468 }
6469
Owen Taylor3473f882001-02-23 17:55:21 +00006470 /*
6471 * This is to avoid a nasty side effect, see
6472 * characters() in SAX.c
6473 */
6474 ctxt->nodemem = 0;
6475 ctxt->nodelen = 0;
6476 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006477 }
6478 }
6479 } else {
6480 val = ent->content;
6481 if (val == NULL) return;
6482 /*
6483 * inline the entity.
6484 */
6485 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6486 (!ctxt->disableSAX))
6487 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6488 }
6489 }
6490}
6491
6492/**
6493 * xmlParseEntityRef:
6494 * @ctxt: an XML parser context
6495 *
6496 * parse ENTITY references declarations
6497 *
6498 * [68] EntityRef ::= '&' Name ';'
6499 *
6500 * [ WFC: Entity Declared ]
6501 * In a document without any DTD, a document with only an internal DTD
6502 * subset which contains no parameter entity references, or a document
6503 * with "standalone='yes'", the Name given in the entity reference
6504 * must match that in an entity declaration, except that well-formed
6505 * documents need not declare any of the following entities: amp, lt,
6506 * gt, apos, quot. The declaration of a parameter entity must precede
6507 * any reference to it. Similarly, the declaration of a general entity
6508 * must precede any reference to it which appears in a default value in an
6509 * attribute-list declaration. Note that if entities are declared in the
6510 * external subset or in external parameter entities, a non-validating
6511 * processor is not obligated to read and process their declarations;
6512 * for such documents, the rule that an entity must be declared is a
6513 * well-formedness constraint only if standalone='yes'.
6514 *
6515 * [ WFC: Parsed Entity ]
6516 * An entity reference must not contain the name of an unparsed entity
6517 *
6518 * Returns the xmlEntityPtr if found, or NULL otherwise.
6519 */
6520xmlEntityPtr
6521xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006522 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006523 xmlEntityPtr ent = NULL;
6524
6525 GROW;
6526
6527 if (RAW == '&') {
6528 NEXT;
6529 name = xmlParseName(ctxt);
6530 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006531 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6532 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006533 } else {
6534 if (RAW == ';') {
6535 NEXT;
6536 /*
6537 * Ask first SAX for entity resolution, otherwise try the
6538 * predefined set.
6539 */
6540 if (ctxt->sax != NULL) {
6541 if (ctxt->sax->getEntity != NULL)
6542 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006543 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006544 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006545 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6546 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006547 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006548 }
Owen Taylor3473f882001-02-23 17:55:21 +00006549 }
6550 /*
6551 * [ WFC: Entity Declared ]
6552 * In a document without any DTD, a document with only an
6553 * internal DTD subset which contains no parameter entity
6554 * references, or a document with "standalone='yes'", the
6555 * Name given in the entity reference must match that in an
6556 * entity declaration, except that well-formed documents
6557 * need not declare any of the following entities: amp, lt,
6558 * gt, apos, quot.
6559 * The declaration of a parameter entity must precede any
6560 * reference to it.
6561 * Similarly, the declaration of a general entity must
6562 * precede any reference to it which appears in a default
6563 * value in an attribute-list declaration. Note that if
6564 * entities are declared in the external subset or in
6565 * external parameter entities, a non-validating processor
6566 * is not obligated to read and process their declarations;
6567 * for such documents, the rule that an entity must be
6568 * declared is a well-formedness constraint only if
6569 * standalone='yes'.
6570 */
6571 if (ent == NULL) {
6572 if ((ctxt->standalone == 1) ||
6573 ((ctxt->hasExternalSubset == 0) &&
6574 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006578 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006579 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006580 if ((ctxt->inSubset == 0) &&
6581 (ctxt->sax != NULL) &&
6582 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006583 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006584 }
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006586 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 }
6588
6589 /*
6590 * [ WFC: Parsed Entity ]
6591 * An entity reference must not contain the name of an
6592 * unparsed entity
6593 */
6594 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006595 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006596 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006597 }
6598
6599 /*
6600 * [ WFC: No External Entity References ]
6601 * Attribute values cannot contain direct or indirect
6602 * entity references to external entities.
6603 */
6604 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6605 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006606 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6607 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006608 }
6609 /*
6610 * [ WFC: No < in Attribute Values ]
6611 * The replacement text of any entity referred to directly or
6612 * indirectly in an attribute value (other than "&lt;") must
6613 * not contain a <.
6614 */
6615 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6616 (ent != NULL) &&
6617 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6618 (ent->content != NULL) &&
6619 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006620 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006621 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006622 }
6623
6624 /*
6625 * Internal check, no parameter entities here ...
6626 */
6627 else {
6628 switch (ent->etype) {
6629 case XML_INTERNAL_PARAMETER_ENTITY:
6630 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006631 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6632 "Attempt to reference the parameter entity '%s'\n",
6633 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006634 break;
6635 default:
6636 break;
6637 }
6638 }
6639
6640 /*
6641 * [ WFC: No Recursion ]
6642 * A parsed entity must not contain a recursive reference
6643 * to itself, either directly or indirectly.
6644 * Done somewhere else
6645 */
6646
6647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006648 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 }
Owen Taylor3473f882001-02-23 17:55:21 +00006650 }
6651 }
6652 return(ent);
6653}
6654
6655/**
6656 * xmlParseStringEntityRef:
6657 * @ctxt: an XML parser context
6658 * @str: a pointer to an index in the string
6659 *
6660 * parse ENTITY references declarations, but this version parses it from
6661 * a string value.
6662 *
6663 * [68] EntityRef ::= '&' Name ';'
6664 *
6665 * [ WFC: Entity Declared ]
6666 * In a document without any DTD, a document with only an internal DTD
6667 * subset which contains no parameter entity references, or a document
6668 * with "standalone='yes'", the Name given in the entity reference
6669 * must match that in an entity declaration, except that well-formed
6670 * documents need not declare any of the following entities: amp, lt,
6671 * gt, apos, quot. The declaration of a parameter entity must precede
6672 * any reference to it. Similarly, the declaration of a general entity
6673 * must precede any reference to it which appears in a default value in an
6674 * attribute-list declaration. Note that if entities are declared in the
6675 * external subset or in external parameter entities, a non-validating
6676 * processor is not obligated to read and process their declarations;
6677 * for such documents, the rule that an entity must be declared is a
6678 * well-formedness constraint only if standalone='yes'.
6679 *
6680 * [ WFC: Parsed Entity ]
6681 * An entity reference must not contain the name of an unparsed entity
6682 *
6683 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6684 * is updated to the current location in the string.
6685 */
6686xmlEntityPtr
6687xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6688 xmlChar *name;
6689 const xmlChar *ptr;
6690 xmlChar cur;
6691 xmlEntityPtr ent = NULL;
6692
6693 if ((str == NULL) || (*str == NULL))
6694 return(NULL);
6695 ptr = *str;
6696 cur = *ptr;
6697 if (cur == '&') {
6698 ptr++;
6699 cur = *ptr;
6700 name = xmlParseStringName(ctxt, &ptr);
6701 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006702 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6703 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006704 } else {
6705 if (*ptr == ';') {
6706 ptr++;
6707 /*
6708 * Ask first SAX for entity resolution, otherwise try the
6709 * predefined set.
6710 */
6711 if (ctxt->sax != NULL) {
6712 if (ctxt->sax->getEntity != NULL)
6713 ent = ctxt->sax->getEntity(ctxt->userData, name);
6714 if (ent == NULL)
6715 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006716 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006717 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006718 }
Owen Taylor3473f882001-02-23 17:55:21 +00006719 }
6720 /*
6721 * [ WFC: Entity Declared ]
6722 * In a document without any DTD, a document with only an
6723 * internal DTD subset which contains no parameter entity
6724 * references, or a document with "standalone='yes'", the
6725 * Name given in the entity reference must match that in an
6726 * entity declaration, except that well-formed documents
6727 * need not declare any of the following entities: amp, lt,
6728 * gt, apos, quot.
6729 * The declaration of a parameter entity must precede any
6730 * reference to it.
6731 * Similarly, the declaration of a general entity must
6732 * precede any reference to it which appears in a default
6733 * value in an attribute-list declaration. Note that if
6734 * entities are declared in the external subset or in
6735 * external parameter entities, a non-validating processor
6736 * is not obligated to read and process their declarations;
6737 * for such documents, the rule that an entity must be
6738 * declared is a well-formedness constraint only if
6739 * standalone='yes'.
6740 */
6741 if (ent == NULL) {
6742 if ((ctxt->standalone == 1) ||
6743 ((ctxt->hasExternalSubset == 0) &&
6744 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006746 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006747 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006748 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006749 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006750 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006751 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006752 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006753 }
6754
6755 /*
6756 * [ WFC: Parsed Entity ]
6757 * An entity reference must not contain the name of an
6758 * unparsed entity
6759 */
6760 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006761 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006762 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006763 }
6764
6765 /*
6766 * [ WFC: No External Entity References ]
6767 * Attribute values cannot contain direct or indirect
6768 * entity references to external entities.
6769 */
6770 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6771 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006772 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006773 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006774 }
6775 /*
6776 * [ WFC: No < in Attribute Values ]
6777 * The replacement text of any entity referred to directly or
6778 * indirectly in an attribute value (other than "&lt;") must
6779 * not contain a <.
6780 */
6781 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6782 (ent != NULL) &&
6783 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6784 (ent->content != NULL) &&
6785 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006786 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6787 "'<' in entity '%s' is not allowed in attributes values\n",
6788 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006789 }
6790
6791 /*
6792 * Internal check, no parameter entities here ...
6793 */
6794 else {
6795 switch (ent->etype) {
6796 case XML_INTERNAL_PARAMETER_ENTITY:
6797 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006798 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6799 "Attempt to reference the parameter entity '%s'\n",
6800 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006801 break;
6802 default:
6803 break;
6804 }
6805 }
6806
6807 /*
6808 * [ WFC: No Recursion ]
6809 * A parsed entity must not contain a recursive reference
6810 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006811 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006812 */
6813
6814 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006815 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006816 }
6817 xmlFree(name);
6818 }
6819 }
6820 *str = ptr;
6821 return(ent);
6822}
6823
6824/**
6825 * xmlParsePEReference:
6826 * @ctxt: an XML parser context
6827 *
6828 * parse PEReference declarations
6829 * The entity content is handled directly by pushing it's content as
6830 * a new input stream.
6831 *
6832 * [69] PEReference ::= '%' Name ';'
6833 *
6834 * [ WFC: No Recursion ]
6835 * A parsed entity must not contain a recursive
6836 * reference to itself, either directly or indirectly.
6837 *
6838 * [ WFC: Entity Declared ]
6839 * In a document without any DTD, a document with only an internal DTD
6840 * subset which contains no parameter entity references, or a document
6841 * with "standalone='yes'", ... ... The declaration of a parameter
6842 * entity must precede any reference to it...
6843 *
6844 * [ VC: Entity Declared ]
6845 * In a document with an external subset or external parameter entities
6846 * with "standalone='no'", ... ... The declaration of a parameter entity
6847 * must precede any reference to it...
6848 *
6849 * [ WFC: In DTD ]
6850 * Parameter-entity references may only appear in the DTD.
6851 * NOTE: misleading but this is handled.
6852 */
6853void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006854xmlParsePEReference(xmlParserCtxtPtr ctxt)
6855{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006856 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006857 xmlEntityPtr entity = NULL;
6858 xmlParserInputPtr input;
6859
6860 if (RAW == '%') {
6861 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006862 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006863 if (name == NULL) {
6864 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6865 "xmlParsePEReference: no name\n");
6866 } else {
6867 if (RAW == ';') {
6868 NEXT;
6869 if ((ctxt->sax != NULL) &&
6870 (ctxt->sax->getParameterEntity != NULL))
6871 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6872 name);
6873 if (entity == NULL) {
6874 /*
6875 * [ WFC: Entity Declared ]
6876 * In a document without any DTD, a document with only an
6877 * internal DTD subset which contains no parameter entity
6878 * references, or a document with "standalone='yes'", ...
6879 * ... The declaration of a parameter entity must precede
6880 * any reference to it...
6881 */
6882 if ((ctxt->standalone == 1) ||
6883 ((ctxt->hasExternalSubset == 0) &&
6884 (ctxt->hasPErefs == 0))) {
6885 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6886 "PEReference: %%%s; not found\n",
6887 name);
6888 } else {
6889 /*
6890 * [ VC: Entity Declared ]
6891 * In a document with an external subset or external
6892 * parameter entities with "standalone='no'", ...
6893 * ... The declaration of a parameter entity must
6894 * precede any reference to it...
6895 */
6896 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6897 "PEReference: %%%s; not found\n",
6898 name, NULL);
6899 ctxt->valid = 0;
6900 }
6901 } else {
6902 /*
6903 * Internal checking in case the entity quest barfed
6904 */
6905 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6906 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6907 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6908 "Internal: %%%s; is not a parameter entity\n",
6909 name, NULL);
6910 } else if (ctxt->input->free != deallocblankswrapper) {
6911 input =
6912 xmlNewBlanksWrapperInputStream(ctxt, entity);
6913 xmlPushInput(ctxt, input);
6914 } else {
6915 /*
6916 * TODO !!!
6917 * handle the extra spaces added before and after
6918 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6919 */
6920 input = xmlNewEntityInputStream(ctxt, entity);
6921 xmlPushInput(ctxt, input);
6922 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006923 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006924 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006925 xmlParseTextDecl(ctxt);
6926 if (ctxt->errNo ==
6927 XML_ERR_UNSUPPORTED_ENCODING) {
6928 /*
6929 * The XML REC instructs us to stop parsing
6930 * right here
6931 */
6932 ctxt->instate = XML_PARSER_EOF;
6933 return;
6934 }
6935 }
6936 }
6937 }
6938 ctxt->hasPErefs = 1;
6939 } else {
6940 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6941 }
6942 }
Owen Taylor3473f882001-02-23 17:55:21 +00006943 }
6944}
6945
6946/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006947 * xmlLoadEntityContent:
6948 * @ctxt: an XML parser context
6949 * @entity: an unloaded system entity
6950 *
6951 * Load the original content of the given system entity from the
6952 * ExternalID/SystemID given. This is to be used for Included in Literal
6953 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
6954 *
6955 * Returns 0 in case of success and -1 in case of failure
6956 */
6957static int
6958xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
6959 xmlParserInputPtr input;
6960 xmlBufferPtr buf;
6961 int l, c;
6962 int count = 0;
6963
6964 if ((ctxt == NULL) || (entity == NULL) ||
6965 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
6966 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
6967 (entity->content != NULL)) {
6968 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6969 "xmlLoadEntityContent parameter error");
6970 return(-1);
6971 }
6972
6973 if (xmlParserDebugEntities)
6974 xmlGenericError(xmlGenericErrorContext,
6975 "Reading %s entity content input\n", entity->name);
6976
6977 buf = xmlBufferCreate();
6978 if (buf == NULL) {
6979 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6980 "xmlLoadEntityContent parameter error");
6981 return(-1);
6982 }
6983
6984 input = xmlNewEntityInputStream(ctxt, entity);
6985 if (input == NULL) {
6986 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6987 "xmlLoadEntityContent input error");
6988 xmlBufferFree(buf);
6989 return(-1);
6990 }
6991
6992 /*
6993 * Push the entity as the current input, read char by char
6994 * saving to the buffer until the end of the entity or an error
6995 */
6996 xmlPushInput(ctxt, input);
6997 GROW;
6998 c = CUR_CHAR(l);
6999 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7000 (IS_CHAR(c))) {
7001 xmlBufferAdd(buf, ctxt->input->cur, l);
7002 if (count++ > 100) {
7003 count = 0;
7004 GROW;
7005 }
7006 NEXTL(l);
7007 c = CUR_CHAR(l);
7008 }
7009
7010 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7011 xmlPopInput(ctxt);
7012 } else if (!IS_CHAR(c)) {
7013 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7014 "xmlLoadEntityContent: invalid char value %d\n",
7015 c);
7016 xmlBufferFree(buf);
7017 return(-1);
7018 }
7019 entity->content = buf->content;
7020 buf->content = NULL;
7021 xmlBufferFree(buf);
7022
7023 return(0);
7024}
7025
7026/**
Owen Taylor3473f882001-02-23 17:55:21 +00007027 * xmlParseStringPEReference:
7028 * @ctxt: an XML parser context
7029 * @str: a pointer to an index in the string
7030 *
7031 * parse PEReference declarations
7032 *
7033 * [69] PEReference ::= '%' Name ';'
7034 *
7035 * [ WFC: No Recursion ]
7036 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007037 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007038 *
7039 * [ WFC: Entity Declared ]
7040 * In a document without any DTD, a document with only an internal DTD
7041 * subset which contains no parameter entity references, or a document
7042 * with "standalone='yes'", ... ... The declaration of a parameter
7043 * entity must precede any reference to it...
7044 *
7045 * [ VC: Entity Declared ]
7046 * In a document with an external subset or external parameter entities
7047 * with "standalone='no'", ... ... The declaration of a parameter entity
7048 * must precede any reference to it...
7049 *
7050 * [ WFC: In DTD ]
7051 * Parameter-entity references may only appear in the DTD.
7052 * NOTE: misleading but this is handled.
7053 *
7054 * Returns the string of the entity content.
7055 * str is updated to the current value of the index
7056 */
7057xmlEntityPtr
7058xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7059 const xmlChar *ptr;
7060 xmlChar cur;
7061 xmlChar *name;
7062 xmlEntityPtr entity = NULL;
7063
7064 if ((str == NULL) || (*str == NULL)) return(NULL);
7065 ptr = *str;
7066 cur = *ptr;
7067 if (cur == '%') {
7068 ptr++;
7069 cur = *ptr;
7070 name = xmlParseStringName(ctxt, &ptr);
7071 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007072 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7073 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007074 } else {
7075 cur = *ptr;
7076 if (cur == ';') {
7077 ptr++;
7078 cur = *ptr;
7079 if ((ctxt->sax != NULL) &&
7080 (ctxt->sax->getParameterEntity != NULL))
7081 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7082 name);
7083 if (entity == NULL) {
7084 /*
7085 * [ WFC: Entity Declared ]
7086 * In a document without any DTD, a document with only an
7087 * internal DTD subset which contains no parameter entity
7088 * references, or a document with "standalone='yes'", ...
7089 * ... The declaration of a parameter entity must precede
7090 * any reference to it...
7091 */
7092 if ((ctxt->standalone == 1) ||
7093 ((ctxt->hasExternalSubset == 0) &&
7094 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007095 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007096 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007097 } else {
7098 /*
7099 * [ VC: Entity Declared ]
7100 * In a document with an external subset or external
7101 * parameter entities with "standalone='no'", ...
7102 * ... The declaration of a parameter entity must
7103 * precede any reference to it...
7104 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007105 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7106 "PEReference: %%%s; not found\n",
7107 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007108 ctxt->valid = 0;
7109 }
7110 } else {
7111 /*
7112 * Internal checking in case the entity quest barfed
7113 */
7114 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7115 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007116 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7117 "%%%s; is not a parameter entity\n",
7118 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007119 }
7120 }
7121 ctxt->hasPErefs = 1;
7122 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007123 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007124 }
7125 xmlFree(name);
7126 }
7127 }
7128 *str = ptr;
7129 return(entity);
7130}
7131
7132/**
7133 * xmlParseDocTypeDecl:
7134 * @ctxt: an XML parser context
7135 *
7136 * parse a DOCTYPE declaration
7137 *
7138 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7139 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7140 *
7141 * [ VC: Root Element Type ]
7142 * The Name in the document type declaration must match the element
7143 * type of the root element.
7144 */
7145
7146void
7147xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007148 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007149 xmlChar *ExternalID = NULL;
7150 xmlChar *URI = NULL;
7151
7152 /*
7153 * We know that '<!DOCTYPE' has been detected.
7154 */
7155 SKIP(9);
7156
7157 SKIP_BLANKS;
7158
7159 /*
7160 * Parse the DOCTYPE name.
7161 */
7162 name = xmlParseName(ctxt);
7163 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7165 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007166 }
7167 ctxt->intSubName = name;
7168
7169 SKIP_BLANKS;
7170
7171 /*
7172 * Check for SystemID and ExternalID
7173 */
7174 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7175
7176 if ((URI != NULL) || (ExternalID != NULL)) {
7177 ctxt->hasExternalSubset = 1;
7178 }
7179 ctxt->extSubURI = URI;
7180 ctxt->extSubSystem = ExternalID;
7181
7182 SKIP_BLANKS;
7183
7184 /*
7185 * Create and update the internal subset.
7186 */
7187 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7188 (!ctxt->disableSAX))
7189 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7190
7191 /*
7192 * Is there any internal subset declarations ?
7193 * they are handled separately in xmlParseInternalSubset()
7194 */
7195 if (RAW == '[')
7196 return;
7197
7198 /*
7199 * We should be at the end of the DOCTYPE declaration.
7200 */
7201 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007202 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007203 }
7204 NEXT;
7205}
7206
7207/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007208 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007209 * @ctxt: an XML parser context
7210 *
7211 * parse the internal subset declaration
7212 *
7213 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7214 */
7215
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007216static void
Owen Taylor3473f882001-02-23 17:55:21 +00007217xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7218 /*
7219 * Is there any DTD definition ?
7220 */
7221 if (RAW == '[') {
7222 ctxt->instate = XML_PARSER_DTD;
7223 NEXT;
7224 /*
7225 * Parse the succession of Markup declarations and
7226 * PEReferences.
7227 * Subsequence (markupdecl | PEReference | S)*
7228 */
7229 while (RAW != ']') {
7230 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007231 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007232
7233 SKIP_BLANKS;
7234 xmlParseMarkupDecl(ctxt);
7235 xmlParsePEReference(ctxt);
7236
7237 /*
7238 * Pop-up of finished entities.
7239 */
7240 while ((RAW == 0) && (ctxt->inputNr > 1))
7241 xmlPopInput(ctxt);
7242
7243 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007244 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007245 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007246 break;
7247 }
7248 }
7249 if (RAW == ']') {
7250 NEXT;
7251 SKIP_BLANKS;
7252 }
7253 }
7254
7255 /*
7256 * We should be at the end of the DOCTYPE declaration.
7257 */
7258 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007259 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007260 }
7261 NEXT;
7262}
7263
Daniel Veillard81273902003-09-30 00:43:48 +00007264#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007265/**
7266 * xmlParseAttribute:
7267 * @ctxt: an XML parser context
7268 * @value: a xmlChar ** used to store the value of the attribute
7269 *
7270 * parse an attribute
7271 *
7272 * [41] Attribute ::= Name Eq AttValue
7273 *
7274 * [ WFC: No External Entity References ]
7275 * Attribute values cannot contain direct or indirect entity references
7276 * to external entities.
7277 *
7278 * [ WFC: No < in Attribute Values ]
7279 * The replacement text of any entity referred to directly or indirectly in
7280 * an attribute value (other than "&lt;") must not contain a <.
7281 *
7282 * [ VC: Attribute Value Type ]
7283 * The attribute must have been declared; the value must be of the type
7284 * declared for it.
7285 *
7286 * [25] Eq ::= S? '=' S?
7287 *
7288 * With namespace:
7289 *
7290 * [NS 11] Attribute ::= QName Eq AttValue
7291 *
7292 * Also the case QName == xmlns:??? is handled independently as a namespace
7293 * definition.
7294 *
7295 * Returns the attribute name, and the value in *value.
7296 */
7297
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007298const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007299xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007300 const xmlChar *name;
7301 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007302
7303 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007304 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007305 name = xmlParseName(ctxt);
7306 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007307 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007308 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007309 return(NULL);
7310 }
7311
7312 /*
7313 * read the value
7314 */
7315 SKIP_BLANKS;
7316 if (RAW == '=') {
7317 NEXT;
7318 SKIP_BLANKS;
7319 val = xmlParseAttValue(ctxt);
7320 ctxt->instate = XML_PARSER_CONTENT;
7321 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007322 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007323 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007324 return(NULL);
7325 }
7326
7327 /*
7328 * Check that xml:lang conforms to the specification
7329 * No more registered as an error, just generate a warning now
7330 * since this was deprecated in XML second edition
7331 */
7332 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7333 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007334 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7335 "Malformed value for xml:lang : %s\n",
7336 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007337 }
7338 }
7339
7340 /*
7341 * Check that xml:space conforms to the specification
7342 */
7343 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7344 if (xmlStrEqual(val, BAD_CAST "default"))
7345 *(ctxt->space) = 0;
7346 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7347 *(ctxt->space) = 1;
7348 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007349 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007350"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007351 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007352 }
7353 }
7354
7355 *value = val;
7356 return(name);
7357}
7358
7359/**
7360 * xmlParseStartTag:
7361 * @ctxt: an XML parser context
7362 *
7363 * parse a start of tag either for rule element or
7364 * EmptyElement. In both case we don't parse the tag closing chars.
7365 *
7366 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7367 *
7368 * [ WFC: Unique Att Spec ]
7369 * No attribute name may appear more than once in the same start-tag or
7370 * empty-element tag.
7371 *
7372 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7373 *
7374 * [ WFC: Unique Att Spec ]
7375 * No attribute name may appear more than once in the same start-tag or
7376 * empty-element tag.
7377 *
7378 * With namespace:
7379 *
7380 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7381 *
7382 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7383 *
7384 * Returns the element name parsed
7385 */
7386
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007387const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007388xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007389 const xmlChar *name;
7390 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007391 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007392 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007393 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007394 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007395 int i;
7396
7397 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007398 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007399
7400 name = xmlParseName(ctxt);
7401 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007402 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007403 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007404 return(NULL);
7405 }
7406
7407 /*
7408 * Now parse the attributes, it ends up with the ending
7409 *
7410 * (S Attribute)* S?
7411 */
7412 SKIP_BLANKS;
7413 GROW;
7414
Daniel Veillard21a0f912001-02-25 19:54:14 +00007415 while ((RAW != '>') &&
7416 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007417 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007418 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007419 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007420
7421 attname = xmlParseAttribute(ctxt, &attvalue);
7422 if ((attname != NULL) && (attvalue != NULL)) {
7423 /*
7424 * [ WFC: Unique Att Spec ]
7425 * No attribute name may appear more than once in the same
7426 * start-tag or empty-element tag.
7427 */
7428 for (i = 0; i < nbatts;i += 2) {
7429 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007430 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007431 xmlFree(attvalue);
7432 goto failed;
7433 }
7434 }
Owen Taylor3473f882001-02-23 17:55:21 +00007435 /*
7436 * Add the pair to atts
7437 */
7438 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007439 maxatts = 22; /* allow for 10 attrs by default */
7440 atts = (const xmlChar **)
7441 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007442 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007443 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007444 if (attvalue != NULL)
7445 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007446 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007447 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007448 ctxt->atts = atts;
7449 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007450 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007451 const xmlChar **n;
7452
Owen Taylor3473f882001-02-23 17:55:21 +00007453 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007454 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007455 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007456 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007457 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007458 if (attvalue != NULL)
7459 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007460 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007461 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007462 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007463 ctxt->atts = atts;
7464 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007465 }
7466 atts[nbatts++] = attname;
7467 atts[nbatts++] = attvalue;
7468 atts[nbatts] = NULL;
7469 atts[nbatts + 1] = NULL;
7470 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007471 if (attvalue != NULL)
7472 xmlFree(attvalue);
7473 }
7474
7475failed:
7476
Daniel Veillard3772de32002-12-17 10:31:45 +00007477 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007478 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7479 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007480 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7482 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007483 }
7484 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007485 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7486 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007487 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7488 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007489 break;
7490 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007491 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007492 GROW;
7493 }
7494
7495 /*
7496 * SAX: Start of Element !
7497 */
7498 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007499 (!ctxt->disableSAX)) {
7500 if (nbatts > 0)
7501 ctxt->sax->startElement(ctxt->userData, name, atts);
7502 else
7503 ctxt->sax->startElement(ctxt->userData, name, NULL);
7504 }
Owen Taylor3473f882001-02-23 17:55:21 +00007505
7506 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007507 /* Free only the content strings */
7508 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007509 if (atts[i] != NULL)
7510 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007511 }
7512 return(name);
7513}
7514
7515/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007517 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 * @line: line of the start tag
7519 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007520 *
7521 * parse an end of tag
7522 *
7523 * [42] ETag ::= '</' Name S? '>'
7524 *
7525 * With namespace
7526 *
7527 * [NS 9] ETag ::= '</' QName S? '>'
7528 */
7529
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007530static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007532 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007533
7534 GROW;
7535 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007536 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007537 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007538 return;
7539 }
7540 SKIP(2);
7541
Daniel Veillard46de64e2002-05-29 08:21:33 +00007542 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007543
7544 /*
7545 * We should definitely be at the ending "S? '>'" part
7546 */
7547 GROW;
7548 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007549 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007550 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007551 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007552 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007553
7554 /*
7555 * [ WFC: Element Type Match ]
7556 * The Name in an element's end-tag must match the element type in the
7557 * start-tag.
7558 *
7559 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007560 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007561 if (name == NULL) name = BAD_CAST "unparseable";
7562 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007563 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007564 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007565 }
7566
7567 /*
7568 * SAX: End of Tag
7569 */
7570 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7571 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007572 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007573
Daniel Veillarde57ec792003-09-10 10:50:59 +00007574 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007575 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007576 return;
7577}
7578
7579/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007580 * xmlParseEndTag:
7581 * @ctxt: an XML parser context
7582 *
7583 * parse an end of tag
7584 *
7585 * [42] ETag ::= '</' Name S? '>'
7586 *
7587 * With namespace
7588 *
7589 * [NS 9] ETag ::= '</' QName S? '>'
7590 */
7591
7592void
7593xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007594 xmlParseEndTag1(ctxt, 0);
7595}
Daniel Veillard81273902003-09-30 00:43:48 +00007596#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007597
7598/************************************************************************
7599 * *
7600 * SAX 2 specific operations *
7601 * *
7602 ************************************************************************/
7603
7604static const xmlChar *
7605xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7606 int len = 0, l;
7607 int c;
7608 int count = 0;
7609
7610 /*
7611 * Handler for more complex cases
7612 */
7613 GROW;
7614 c = CUR_CHAR(l);
7615 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007616 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 return(NULL);
7618 }
7619
7620 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007621 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007622 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007623 (IS_COMBINING(c)) ||
7624 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625 if (count++ > 100) {
7626 count = 0;
7627 GROW;
7628 }
7629 len += l;
7630 NEXTL(l);
7631 c = CUR_CHAR(l);
7632 }
7633 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7634}
7635
7636/*
7637 * xmlGetNamespace:
7638 * @ctxt: an XML parser context
7639 * @prefix: the prefix to lookup
7640 *
7641 * Lookup the namespace name for the @prefix (which ca be NULL)
7642 * The prefix must come from the @ctxt->dict dictionnary
7643 *
7644 * Returns the namespace name or NULL if not bound
7645 */
7646static const xmlChar *
7647xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7648 int i;
7649
Daniel Veillarde57ec792003-09-10 10:50:59 +00007650 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007652 if (ctxt->nsTab[i] == prefix) {
7653 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7654 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007655 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007656 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657 return(NULL);
7658}
7659
7660/**
7661 * xmlParseNCName:
7662 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007663 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 *
7665 * parse an XML name.
7666 *
7667 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7668 * CombiningChar | Extender
7669 *
7670 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7671 *
7672 * Returns the Name parsed or NULL
7673 */
7674
7675static const xmlChar *
7676xmlParseNCName(xmlParserCtxtPtr ctxt) {
7677 const xmlChar *in;
7678 const xmlChar *ret;
7679 int count = 0;
7680
7681 /*
7682 * Accelerator for simple ASCII names
7683 */
7684 in = ctxt->input->cur;
7685 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7686 ((*in >= 0x41) && (*in <= 0x5A)) ||
7687 (*in == '_')) {
7688 in++;
7689 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7690 ((*in >= 0x41) && (*in <= 0x5A)) ||
7691 ((*in >= 0x30) && (*in <= 0x39)) ||
7692 (*in == '_') || (*in == '-') ||
7693 (*in == '.'))
7694 in++;
7695 if ((*in > 0) && (*in < 0x80)) {
7696 count = in - ctxt->input->cur;
7697 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7698 ctxt->input->cur = in;
7699 ctxt->nbChars += count;
7700 ctxt->input->col += count;
7701 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007702 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007703 }
7704 return(ret);
7705 }
7706 }
7707 return(xmlParseNCNameComplex(ctxt));
7708}
7709
7710/**
7711 * xmlParseQName:
7712 * @ctxt: an XML parser context
7713 * @prefix: pointer to store the prefix part
7714 *
7715 * parse an XML Namespace QName
7716 *
7717 * [6] QName ::= (Prefix ':')? LocalPart
7718 * [7] Prefix ::= NCName
7719 * [8] LocalPart ::= NCName
7720 *
7721 * Returns the Name parsed or NULL
7722 */
7723
7724static const xmlChar *
7725xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7726 const xmlChar *l, *p;
7727
7728 GROW;
7729
7730 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007731 if (l == NULL) {
7732 if (CUR == ':') {
7733 l = xmlParseName(ctxt);
7734 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007735 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7736 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007737 *prefix = NULL;
7738 return(l);
7739 }
7740 }
7741 return(NULL);
7742 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743 if (CUR == ':') {
7744 NEXT;
7745 p = l;
7746 l = xmlParseNCName(ctxt);
7747 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007748 xmlChar *tmp;
7749
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007750 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7751 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007752 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7753 p = xmlDictLookup(ctxt->dict, tmp, -1);
7754 if (tmp != NULL) xmlFree(tmp);
7755 *prefix = NULL;
7756 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 }
7758 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007759 xmlChar *tmp;
7760
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007761 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7762 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007763 NEXT;
7764 tmp = (xmlChar *) xmlParseName(ctxt);
7765 if (tmp != NULL) {
7766 tmp = xmlBuildQName(tmp, l, NULL, 0);
7767 l = xmlDictLookup(ctxt->dict, tmp, -1);
7768 if (tmp != NULL) xmlFree(tmp);
7769 *prefix = p;
7770 return(l);
7771 }
7772 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7773 l = xmlDictLookup(ctxt->dict, tmp, -1);
7774 if (tmp != NULL) xmlFree(tmp);
7775 *prefix = p;
7776 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007777 }
7778 *prefix = p;
7779 } else
7780 *prefix = NULL;
7781 return(l);
7782}
7783
7784/**
7785 * xmlParseQNameAndCompare:
7786 * @ctxt: an XML parser context
7787 * @name: the localname
7788 * @prefix: the prefix, if any.
7789 *
7790 * parse an XML name and compares for match
7791 * (specialized for endtag parsing)
7792 *
7793 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7794 * and the name for mismatch
7795 */
7796
7797static const xmlChar *
7798xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7799 xmlChar const *prefix) {
7800 const xmlChar *cmp = name;
7801 const xmlChar *in;
7802 const xmlChar *ret;
7803 const xmlChar *prefix2;
7804
7805 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7806
7807 GROW;
7808 in = ctxt->input->cur;
7809
7810 cmp = prefix;
7811 while (*in != 0 && *in == *cmp) {
7812 ++in;
7813 ++cmp;
7814 }
7815 if ((*cmp == 0) && (*in == ':')) {
7816 in++;
7817 cmp = name;
7818 while (*in != 0 && *in == *cmp) {
7819 ++in;
7820 ++cmp;
7821 }
William M. Brack76e95df2003-10-18 16:20:14 +00007822 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007823 /* success */
7824 ctxt->input->cur = in;
7825 return((const xmlChar*) 1);
7826 }
7827 }
7828 /*
7829 * all strings coms from the dictionary, equality can be done directly
7830 */
7831 ret = xmlParseQName (ctxt, &prefix2);
7832 if ((ret == name) && (prefix == prefix2))
7833 return((const xmlChar*) 1);
7834 return ret;
7835}
7836
7837/**
7838 * xmlParseAttValueInternal:
7839 * @ctxt: an XML parser context
7840 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007841 * @alloc: whether the attribute was reallocated as a new string
7842 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007843 *
7844 * parse a value for an attribute.
7845 * NOTE: if no normalization is needed, the routine will return pointers
7846 * directly from the data buffer.
7847 *
7848 * 3.3.3 Attribute-Value Normalization:
7849 * Before the value of an attribute is passed to the application or
7850 * checked for validity, the XML processor must normalize it as follows:
7851 * - a character reference is processed by appending the referenced
7852 * character to the attribute value
7853 * - an entity reference is processed by recursively processing the
7854 * replacement text of the entity
7855 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7856 * appending #x20 to the normalized value, except that only a single
7857 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7858 * parsed entity or the literal entity value of an internal parsed entity
7859 * - other characters are processed by appending them to the normalized value
7860 * If the declared value is not CDATA, then the XML processor must further
7861 * process the normalized attribute value by discarding any leading and
7862 * trailing space (#x20) characters, and by replacing sequences of space
7863 * (#x20) characters by a single space (#x20) character.
7864 * All attributes for which no declaration has been read should be treated
7865 * by a non-validating parser as if declared CDATA.
7866 *
7867 * Returns the AttValue parsed or NULL. The value has to be freed by the
7868 * caller if it was copied, this can be detected by val[*len] == 0.
7869 */
7870
7871static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007872xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7873 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007874{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007875 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007876 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007877 xmlChar *ret = NULL;
7878
7879 GROW;
7880 in = (xmlChar *) CUR_PTR;
7881 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007882 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883 return (NULL);
7884 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007885 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007886
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007887 /*
7888 * try to handle in this routine the most common case where no
7889 * allocation of a new string is required and where content is
7890 * pure ASCII.
7891 */
7892 limit = *in++;
7893 end = ctxt->input->end;
7894 start = in;
7895 if (in >= end) {
7896 const xmlChar *oldbase = ctxt->input->base;
7897 GROW;
7898 if (oldbase != ctxt->input->base) {
7899 long delta = ctxt->input->base - oldbase;
7900 start = start + delta;
7901 in = in + delta;
7902 }
7903 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007905 if (normalize) {
7906 /*
7907 * Skip any leading spaces
7908 */
7909 while ((in < end) && (*in != limit) &&
7910 ((*in == 0x20) || (*in == 0x9) ||
7911 (*in == 0xA) || (*in == 0xD))) {
7912 in++;
7913 start = in;
7914 if (in >= end) {
7915 const xmlChar *oldbase = ctxt->input->base;
7916 GROW;
7917 if (oldbase != ctxt->input->base) {
7918 long delta = ctxt->input->base - oldbase;
7919 start = start + delta;
7920 in = in + delta;
7921 }
7922 end = ctxt->input->end;
7923 }
7924 }
7925 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7926 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7927 if ((*in++ == 0x20) && (*in == 0x20)) break;
7928 if (in >= end) {
7929 const xmlChar *oldbase = ctxt->input->base;
7930 GROW;
7931 if (oldbase != ctxt->input->base) {
7932 long delta = ctxt->input->base - oldbase;
7933 start = start + delta;
7934 in = in + delta;
7935 }
7936 end = ctxt->input->end;
7937 }
7938 }
7939 last = in;
7940 /*
7941 * skip the trailing blanks
7942 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007943 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007944 while ((in < end) && (*in != limit) &&
7945 ((*in == 0x20) || (*in == 0x9) ||
7946 (*in == 0xA) || (*in == 0xD))) {
7947 in++;
7948 if (in >= end) {
7949 const xmlChar *oldbase = ctxt->input->base;
7950 GROW;
7951 if (oldbase != ctxt->input->base) {
7952 long delta = ctxt->input->base - oldbase;
7953 start = start + delta;
7954 in = in + delta;
7955 last = last + delta;
7956 }
7957 end = ctxt->input->end;
7958 }
7959 }
7960 if (*in != limit) goto need_complex;
7961 } else {
7962 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7963 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7964 in++;
7965 if (in >= end) {
7966 const xmlChar *oldbase = ctxt->input->base;
7967 GROW;
7968 if (oldbase != ctxt->input->base) {
7969 long delta = ctxt->input->base - oldbase;
7970 start = start + delta;
7971 in = in + delta;
7972 }
7973 end = ctxt->input->end;
7974 }
7975 }
7976 last = in;
7977 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007978 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007979 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007980 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007981 *len = last - start;
7982 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007984 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007985 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007986 }
7987 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007988 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007989 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007990need_complex:
7991 if (alloc) *alloc = 1;
7992 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007993}
7994
7995/**
7996 * xmlParseAttribute2:
7997 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007998 * @pref: the element prefix
7999 * @elem: the element name
8000 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008001 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008002 * @len: an int * to save the length of the attribute
8003 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008004 *
8005 * parse an attribute in the new SAX2 framework.
8006 *
8007 * Returns the attribute name, and the value in *value, .
8008 */
8009
8010static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008011xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008012 const xmlChar * pref, const xmlChar * elem,
8013 const xmlChar ** prefix, xmlChar ** value,
8014 int *len, int *alloc)
8015{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008017 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008018 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008019
8020 *value = NULL;
8021 GROW;
8022 name = xmlParseQName(ctxt, prefix);
8023 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008024 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8025 "error parsing attribute name\n");
8026 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008027 }
8028
8029 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008030 * get the type if needed
8031 */
8032 if (ctxt->attsSpecial != NULL) {
8033 int type;
8034
8035 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008036 pref, elem, *prefix, name);
8037 if (type != 0)
8038 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008039 }
8040
8041 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008042 * read the value
8043 */
8044 SKIP_BLANKS;
8045 if (RAW == '=') {
8046 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008047 SKIP_BLANKS;
8048 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8049 if (normalize) {
8050 /*
8051 * Sometimes a second normalisation pass for spaces is needed
8052 * but that only happens if charrefs or entities refernces
8053 * have been used in the attribute value, i.e. the attribute
8054 * value have been extracted in an allocated string already.
8055 */
8056 if (*alloc) {
8057 const xmlChar *val2;
8058
8059 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8060 if (val2 != NULL) {
8061 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008062 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008063 }
8064 }
8065 }
8066 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008067 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008068 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8069 "Specification mandate value for attribute %s\n",
8070 name);
8071 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008072 }
8073
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008074 if (*prefix == ctxt->str_xml) {
8075 /*
8076 * Check that xml:lang conforms to the specification
8077 * No more registered as an error, just generate a warning now
8078 * since this was deprecated in XML second edition
8079 */
8080 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8081 internal_val = xmlStrndup(val, *len);
8082 if (!xmlCheckLanguageID(internal_val)) {
8083 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8084 "Malformed value for xml:lang : %s\n",
8085 internal_val, NULL);
8086 }
8087 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008088
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008089 /*
8090 * Check that xml:space conforms to the specification
8091 */
8092 if (xmlStrEqual(name, BAD_CAST "space")) {
8093 internal_val = xmlStrndup(val, *len);
8094 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8095 *(ctxt->space) = 0;
8096 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8097 *(ctxt->space) = 1;
8098 else {
8099 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8100 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8101 internal_val, NULL);
8102 }
8103 }
8104 if (internal_val) {
8105 xmlFree(internal_val);
8106 }
8107 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008108
8109 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008110 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008111}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008112/**
8113 * xmlParseStartTag2:
8114 * @ctxt: an XML parser context
8115 *
8116 * parse a start of tag either for rule element or
8117 * EmptyElement. In both case we don't parse the tag closing chars.
8118 * This routine is called when running SAX2 parsing
8119 *
8120 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8121 *
8122 * [ WFC: Unique Att Spec ]
8123 * No attribute name may appear more than once in the same start-tag or
8124 * empty-element tag.
8125 *
8126 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8127 *
8128 * [ WFC: Unique Att Spec ]
8129 * No attribute name may appear more than once in the same start-tag or
8130 * empty-element tag.
8131 *
8132 * With namespace:
8133 *
8134 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8135 *
8136 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8137 *
8138 * Returns the element name parsed
8139 */
8140
8141static const xmlChar *
8142xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008143 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008144 const xmlChar *localname;
8145 const xmlChar *prefix;
8146 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008147 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008148 const xmlChar *nsname;
8149 xmlChar *attvalue;
8150 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008151 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008152 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008153 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008154 const xmlChar *base;
8155 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008156 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008157
8158 if (RAW != '<') return(NULL);
8159 NEXT1;
8160
8161 /*
8162 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8163 * point since the attribute values may be stored as pointers to
8164 * the buffer and calling SHRINK would destroy them !
8165 * The Shrinking is only possible once the full set of attribute
8166 * callbacks have been done.
8167 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008168reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008169 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008170 base = ctxt->input->base;
8171 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008172 oldline = ctxt->input->line;
8173 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008174 nbatts = 0;
8175 nratts = 0;
8176 nbdef = 0;
8177 nbNs = 0;
8178 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008179 /* Forget any namespaces added during an earlier parse of this element. */
8180 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181
8182 localname = xmlParseQName(ctxt, &prefix);
8183 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008184 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8185 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008186 return(NULL);
8187 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008188 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008189
8190 /*
8191 * Now parse the attributes, it ends up with the ending
8192 *
8193 * (S Attribute)* S?
8194 */
8195 SKIP_BLANKS;
8196 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008197 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008198
8199 while ((RAW != '>') &&
8200 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008201 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008202 const xmlChar *q = CUR_PTR;
8203 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008204 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008206 attname = xmlParseAttribute2(ctxt, prefix, localname,
8207 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008208 if (ctxt->input->base != base) {
8209 if ((attvalue != NULL) && (alloc != 0))
8210 xmlFree(attvalue);
8211 attvalue = NULL;
8212 goto base_changed;
8213 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008214 if ((attname != NULL) && (attvalue != NULL)) {
8215 if (len < 0) len = xmlStrlen(attvalue);
8216 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008217 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8218 xmlURIPtr uri;
8219
8220 if (*URL != 0) {
8221 uri = xmlParseURI((const char *) URL);
8222 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008223 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8224 "xmlns: %s not a valid URI\n",
8225 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008226 } else {
8227 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008228 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8229 "xmlns: URI %s is not absolute\n",
8230 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008231 }
8232 xmlFreeURI(uri);
8233 }
8234 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008235 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008236 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008237 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008238 for (j = 1;j <= nbNs;j++)
8239 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8240 break;
8241 if (j <= nbNs)
8242 xmlErrAttributeDup(ctxt, NULL, attname);
8243 else
8244 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008245 if (alloc != 0) xmlFree(attvalue);
8246 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 continue;
8248 }
8249 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8251 xmlURIPtr uri;
8252
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008253 if (attname == ctxt->str_xml) {
8254 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008255 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8256 "xml namespace prefix mapped to wrong URI\n",
8257 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008258 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008259 /*
8260 * Do not keep a namespace definition node
8261 */
8262 if (alloc != 0) xmlFree(attvalue);
8263 SKIP_BLANKS;
8264 continue;
8265 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008266 uri = xmlParseURI((const char *) URL);
8267 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008268 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8269 "xmlns:%s: '%s' is not a valid URI\n",
8270 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008271 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008272 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008273 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8274 "xmlns:%s: URI %s is not absolute\n",
8275 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008276 }
8277 xmlFreeURI(uri);
8278 }
8279
Daniel Veillard0fb18932003-09-07 09:14:37 +00008280 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008281 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008282 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008283 for (j = 1;j <= nbNs;j++)
8284 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8285 break;
8286 if (j <= nbNs)
8287 xmlErrAttributeDup(ctxt, aprefix, attname);
8288 else
8289 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008290 if (alloc != 0) xmlFree(attvalue);
8291 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008292 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008293 continue;
8294 }
8295
8296 /*
8297 * Add the pair to atts
8298 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008299 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8300 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008301 if (attvalue[len] == 0)
8302 xmlFree(attvalue);
8303 goto failed;
8304 }
8305 maxatts = ctxt->maxatts;
8306 atts = ctxt->atts;
8307 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008308 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008309 atts[nbatts++] = attname;
8310 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008311 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312 atts[nbatts++] = attvalue;
8313 attvalue += len;
8314 atts[nbatts++] = attvalue;
8315 /*
8316 * tag if some deallocation is needed
8317 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008318 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008319 } else {
8320 if ((attvalue != NULL) && (attvalue[len] == 0))
8321 xmlFree(attvalue);
8322 }
8323
8324failed:
8325
8326 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008327 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008328 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8329 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008330 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8332 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008333 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008334 }
8335 SKIP_BLANKS;
8336 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8337 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008338 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008339 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008340 break;
8341 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008342 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008343 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008344 }
8345
Daniel Veillard0fb18932003-09-07 09:14:37 +00008346 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008347 * The attributes defaulting
8348 */
8349 if (ctxt->attsDefault != NULL) {
8350 xmlDefAttrsPtr defaults;
8351
8352 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8353 if (defaults != NULL) {
8354 for (i = 0;i < defaults->nbAttrs;i++) {
8355 attname = defaults->values[4 * i];
8356 aprefix = defaults->values[4 * i + 1];
8357
8358 /*
8359 * special work for namespaces defaulted defs
8360 */
8361 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8362 /*
8363 * check that it's not a defined namespace
8364 */
8365 for (j = 1;j <= nbNs;j++)
8366 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8367 break;
8368 if (j <= nbNs) continue;
8369
8370 nsname = xmlGetNamespace(ctxt, NULL);
8371 if (nsname != defaults->values[4 * i + 2]) {
8372 if (nsPush(ctxt, NULL,
8373 defaults->values[4 * i + 2]) > 0)
8374 nbNs++;
8375 }
8376 } else if (aprefix == ctxt->str_xmlns) {
8377 /*
8378 * check that it's not a defined namespace
8379 */
8380 for (j = 1;j <= nbNs;j++)
8381 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8382 break;
8383 if (j <= nbNs) continue;
8384
8385 nsname = xmlGetNamespace(ctxt, attname);
8386 if (nsname != defaults->values[2]) {
8387 if (nsPush(ctxt, attname,
8388 defaults->values[4 * i + 2]) > 0)
8389 nbNs++;
8390 }
8391 } else {
8392 /*
8393 * check that it's not a defined attribute
8394 */
8395 for (j = 0;j < nbatts;j+=5) {
8396 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8397 break;
8398 }
8399 if (j < nbatts) continue;
8400
8401 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8402 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008403 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008404 }
8405 maxatts = ctxt->maxatts;
8406 atts = ctxt->atts;
8407 }
8408 atts[nbatts++] = attname;
8409 atts[nbatts++] = aprefix;
8410 if (aprefix == NULL)
8411 atts[nbatts++] = NULL;
8412 else
8413 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8414 atts[nbatts++] = defaults->values[4 * i + 2];
8415 atts[nbatts++] = defaults->values[4 * i + 3];
8416 nbdef++;
8417 }
8418 }
8419 }
8420 }
8421
Daniel Veillarde70c8772003-11-25 07:21:18 +00008422 /*
8423 * The attributes checkings
8424 */
8425 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008426 /*
8427 * The default namespace does not apply to attribute names.
8428 */
8429 if (atts[i + 1] != NULL) {
8430 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8431 if (nsname == NULL) {
8432 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8433 "Namespace prefix %s for %s on %s is not defined\n",
8434 atts[i + 1], atts[i], localname);
8435 }
8436 atts[i + 2] = nsname;
8437 } else
8438 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008439 /*
8440 * [ WFC: Unique Att Spec ]
8441 * No attribute name may appear more than once in the same
8442 * start-tag or empty-element tag.
8443 * As extended by the Namespace in XML REC.
8444 */
8445 for (j = 0; j < i;j += 5) {
8446 if (atts[i] == atts[j]) {
8447 if (atts[i+1] == atts[j+1]) {
8448 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8449 break;
8450 }
8451 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8452 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8453 "Namespaced Attribute %s in '%s' redefined\n",
8454 atts[i], nsname, NULL);
8455 break;
8456 }
8457 }
8458 }
8459 }
8460
Daniel Veillarde57ec792003-09-10 10:50:59 +00008461 nsname = xmlGetNamespace(ctxt, prefix);
8462 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008463 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8464 "Namespace prefix %s on %s is not defined\n",
8465 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008466 }
8467 *pref = prefix;
8468 *URI = nsname;
8469
8470 /*
8471 * SAX: Start of Element !
8472 */
8473 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8474 (!ctxt->disableSAX)) {
8475 if (nbNs > 0)
8476 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8477 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8478 nbatts / 5, nbdef, atts);
8479 else
8480 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8481 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8482 }
8483
8484 /*
8485 * Free up attribute allocated strings if needed
8486 */
8487 if (attval != 0) {
8488 for (i = 3,j = 0; j < nratts;i += 5,j++)
8489 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8490 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008491 }
8492
8493 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008494
8495base_changed:
8496 /*
8497 * the attribute strings are valid iif the base didn't changed
8498 */
8499 if (attval != 0) {
8500 for (i = 3,j = 0; j < nratts;i += 5,j++)
8501 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8502 xmlFree((xmlChar *) atts[i]);
8503 }
8504 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008505 ctxt->input->line = oldline;
8506 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008507 if (ctxt->wellFormed == 1) {
8508 goto reparse;
8509 }
8510 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008511}
8512
8513/**
8514 * xmlParseEndTag2:
8515 * @ctxt: an XML parser context
8516 * @line: line of the start tag
8517 * @nsNr: number of namespaces on the start tag
8518 *
8519 * parse an end of tag
8520 *
8521 * [42] ETag ::= '</' Name S? '>'
8522 *
8523 * With namespace
8524 *
8525 * [NS 9] ETag ::= '</' QName S? '>'
8526 */
8527
8528static void
8529xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008530 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008531 const xmlChar *name;
8532
8533 GROW;
8534 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008536 return;
8537 }
8538 SKIP(2);
8539
William M. Brack13dfa872004-09-18 04:52:08 +00008540 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008541 if (ctxt->input->cur[tlen] == '>') {
8542 ctxt->input->cur += tlen + 1;
8543 goto done;
8544 }
8545 ctxt->input->cur += tlen;
8546 name = (xmlChar*)1;
8547 } else {
8548 if (prefix == NULL)
8549 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8550 else
8551 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8552 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553
8554 /*
8555 * We should definitely be at the ending "S? '>'" part
8556 */
8557 GROW;
8558 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008559 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008560 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008561 } else
8562 NEXT1;
8563
8564 /*
8565 * [ WFC: Element Type Match ]
8566 * The Name in an element's end-tag must match the element type in the
8567 * start-tag.
8568 *
8569 */
8570 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008571 if (name == NULL) name = BAD_CAST "unparseable";
8572 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008574 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008575 }
8576
8577 /*
8578 * SAX: End of Tag
8579 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008580done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8582 (!ctxt->disableSAX))
8583 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8584
Daniel Veillard0fb18932003-09-07 09:14:37 +00008585 spacePop(ctxt);
8586 if (nsNr != 0)
8587 nsPop(ctxt, nsNr);
8588 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008589}
8590
8591/**
Owen Taylor3473f882001-02-23 17:55:21 +00008592 * xmlParseCDSect:
8593 * @ctxt: an XML parser context
8594 *
8595 * Parse escaped pure raw content.
8596 *
8597 * [18] CDSect ::= CDStart CData CDEnd
8598 *
8599 * [19] CDStart ::= '<![CDATA['
8600 *
8601 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8602 *
8603 * [21] CDEnd ::= ']]>'
8604 */
8605void
8606xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8607 xmlChar *buf = NULL;
8608 int len = 0;
8609 int size = XML_PARSER_BUFFER_SIZE;
8610 int r, rl;
8611 int s, sl;
8612 int cur, l;
8613 int count = 0;
8614
Daniel Veillard8f597c32003-10-06 08:19:27 +00008615 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008616 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008617 SKIP(9);
8618 } else
8619 return;
8620
8621 ctxt->instate = XML_PARSER_CDATA_SECTION;
8622 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008623 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008624 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 ctxt->instate = XML_PARSER_CONTENT;
8626 return;
8627 }
8628 NEXTL(rl);
8629 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008630 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008631 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008632 ctxt->instate = XML_PARSER_CONTENT;
8633 return;
8634 }
8635 NEXTL(sl);
8636 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008637 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008638 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008639 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 return;
8641 }
William M. Brack871611b2003-10-18 04:53:14 +00008642 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008643 ((r != ']') || (s != ']') || (cur != '>'))) {
8644 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008645 xmlChar *tmp;
8646
Owen Taylor3473f882001-02-23 17:55:21 +00008647 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008648 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8649 if (tmp == NULL) {
8650 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008651 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008652 return;
8653 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008654 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008655 }
8656 COPY_BUF(rl,buf,len,r);
8657 r = s;
8658 rl = sl;
8659 s = cur;
8660 sl = l;
8661 count++;
8662 if (count > 50) {
8663 GROW;
8664 count = 0;
8665 }
8666 NEXTL(l);
8667 cur = CUR_CHAR(l);
8668 }
8669 buf[len] = 0;
8670 ctxt->instate = XML_PARSER_CONTENT;
8671 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008672 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008673 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008674 xmlFree(buf);
8675 return;
8676 }
8677 NEXTL(l);
8678
8679 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008680 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008681 */
8682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8683 if (ctxt->sax->cdataBlock != NULL)
8684 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008685 else if (ctxt->sax->characters != NULL)
8686 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 }
8688 xmlFree(buf);
8689}
8690
8691/**
8692 * xmlParseContent:
8693 * @ctxt: an XML parser context
8694 *
8695 * Parse a content:
8696 *
8697 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8698 */
8699
8700void
8701xmlParseContent(xmlParserCtxtPtr ctxt) {
8702 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008703 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008704 ((RAW != '<') || (NXT(1) != '/')) &&
8705 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008706 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008707 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008708 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008709
8710 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008711 * First case : a Processing Instruction.
8712 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008713 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008714 xmlParsePI(ctxt);
8715 }
8716
8717 /*
8718 * Second case : a CDSection
8719 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008720 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008721 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008722 xmlParseCDSect(ctxt);
8723 }
8724
8725 /*
8726 * Third case : a comment
8727 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008728 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008729 (NXT(2) == '-') && (NXT(3) == '-')) {
8730 xmlParseComment(ctxt);
8731 ctxt->instate = XML_PARSER_CONTENT;
8732 }
8733
8734 /*
8735 * Fourth case : a sub-element.
8736 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008737 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008738 xmlParseElement(ctxt);
8739 }
8740
8741 /*
8742 * Fifth case : a reference. If if has not been resolved,
8743 * parsing returns it's Name, create the node
8744 */
8745
Daniel Veillard21a0f912001-02-25 19:54:14 +00008746 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008747 xmlParseReference(ctxt);
8748 }
8749
8750 /*
8751 * Last case, text. Note that References are handled directly.
8752 */
8753 else {
8754 xmlParseCharData(ctxt, 0);
8755 }
8756
8757 GROW;
8758 /*
8759 * Pop-up of finished entities.
8760 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008761 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008762 xmlPopInput(ctxt);
8763 SHRINK;
8764
Daniel Veillardfdc91562002-07-01 21:52:03 +00008765 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008766 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8767 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008768 ctxt->instate = XML_PARSER_EOF;
8769 break;
8770 }
8771 }
8772}
8773
8774/**
8775 * xmlParseElement:
8776 * @ctxt: an XML parser context
8777 *
8778 * parse an XML element, this is highly recursive
8779 *
8780 * [39] element ::= EmptyElemTag | STag content ETag
8781 *
8782 * [ WFC: Element Type Match ]
8783 * The Name in an element's end-tag must match the element type in the
8784 * start-tag.
8785 *
Owen Taylor3473f882001-02-23 17:55:21 +00008786 */
8787
8788void
8789xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008790 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008791 const xmlChar *prefix;
8792 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008793 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008794 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008795 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008796 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008797
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008798 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8799 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8800 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8801 xmlParserMaxDepth);
8802 ctxt->instate = XML_PARSER_EOF;
8803 return;
8804 }
8805
Owen Taylor3473f882001-02-23 17:55:21 +00008806 /* Capture start position */
8807 if (ctxt->record_info) {
8808 node_info.begin_pos = ctxt->input->consumed +
8809 (CUR_PTR - ctxt->input->base);
8810 node_info.begin_line = ctxt->input->line;
8811 }
8812
8813 if (ctxt->spaceNr == 0)
8814 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008815 else if (*ctxt->space == -2)
8816 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008817 else
8818 spacePush(ctxt, *ctxt->space);
8819
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008820 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008821#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008822 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008823#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008824 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008825#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008826 else
8827 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008828#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008829 if (name == NULL) {
8830 spacePop(ctxt);
8831 return;
8832 }
8833 namePush(ctxt, name);
8834 ret = ctxt->node;
8835
Daniel Veillard4432df22003-09-28 18:58:27 +00008836#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008837 /*
8838 * [ VC: Root Element Type ]
8839 * The Name in the document type declaration must match the element
8840 * type of the root element.
8841 */
8842 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8843 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8844 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008845#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008846
8847 /*
8848 * Check for an Empty Element.
8849 */
8850 if ((RAW == '/') && (NXT(1) == '>')) {
8851 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852 if (ctxt->sax2) {
8853 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8854 (!ctxt->disableSAX))
8855 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008856#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008857 } else {
8858 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8859 (!ctxt->disableSAX))
8860 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008861#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008862 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 namePop(ctxt);
8864 spacePop(ctxt);
8865 if (nsNr != ctxt->nsNr)
8866 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008867 if ( ret != NULL && ctxt->record_info ) {
8868 node_info.end_pos = ctxt->input->consumed +
8869 (CUR_PTR - ctxt->input->base);
8870 node_info.end_line = ctxt->input->line;
8871 node_info.node = ret;
8872 xmlParserAddNodeInfo(ctxt, &node_info);
8873 }
8874 return;
8875 }
8876 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008877 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008878 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008879 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8880 "Couldn't find end of Start Tag %s line %d\n",
8881 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008882
8883 /*
8884 * end of parsing of this node.
8885 */
8886 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008888 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 if (nsNr != ctxt->nsNr)
8890 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008891
8892 /*
8893 * Capture end position and add node
8894 */
8895 if ( ret != NULL && ctxt->record_info ) {
8896 node_info.end_pos = ctxt->input->consumed +
8897 (CUR_PTR - ctxt->input->base);
8898 node_info.end_line = ctxt->input->line;
8899 node_info.node = ret;
8900 xmlParserAddNodeInfo(ctxt, &node_info);
8901 }
8902 return;
8903 }
8904
8905 /*
8906 * Parse the content of the element:
8907 */
8908 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008909 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008910 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008911 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008912 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008913
8914 /*
8915 * end of parsing of this node.
8916 */
8917 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008919 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 if (nsNr != ctxt->nsNr)
8921 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008922 return;
8923 }
8924
8925 /*
8926 * parse the end of tag: '</' should be here.
8927 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008928 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008929 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008930 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008931 }
8932#ifdef LIBXML_SAX1_ENABLED
8933 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008935#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008936
8937 /*
8938 * Capture end position and add node
8939 */
8940 if ( ret != NULL && ctxt->record_info ) {
8941 node_info.end_pos = ctxt->input->consumed +
8942 (CUR_PTR - ctxt->input->base);
8943 node_info.end_line = ctxt->input->line;
8944 node_info.node = ret;
8945 xmlParserAddNodeInfo(ctxt, &node_info);
8946 }
8947}
8948
8949/**
8950 * xmlParseVersionNum:
8951 * @ctxt: an XML parser context
8952 *
8953 * parse the XML version value.
8954 *
8955 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8956 *
8957 * Returns the string giving the XML version number, or NULL
8958 */
8959xmlChar *
8960xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8961 xmlChar *buf = NULL;
8962 int len = 0;
8963 int size = 10;
8964 xmlChar cur;
8965
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008967 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008968 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 return(NULL);
8970 }
8971 cur = CUR;
8972 while (((cur >= 'a') && (cur <= 'z')) ||
8973 ((cur >= 'A') && (cur <= 'Z')) ||
8974 ((cur >= '0') && (cur <= '9')) ||
8975 (cur == '_') || (cur == '.') ||
8976 (cur == ':') || (cur == '-')) {
8977 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008978 xmlChar *tmp;
8979
Owen Taylor3473f882001-02-23 17:55:21 +00008980 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008981 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8982 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008983 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008984 return(NULL);
8985 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008986 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008987 }
8988 buf[len++] = cur;
8989 NEXT;
8990 cur=CUR;
8991 }
8992 buf[len] = 0;
8993 return(buf);
8994}
8995
8996/**
8997 * xmlParseVersionInfo:
8998 * @ctxt: an XML parser context
8999 *
9000 * parse the XML version.
9001 *
9002 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9003 *
9004 * [25] Eq ::= S? '=' S?
9005 *
9006 * Returns the version string, e.g. "1.0"
9007 */
9008
9009xmlChar *
9010xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9011 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009012
Daniel Veillarda07050d2003-10-19 14:46:32 +00009013 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009014 SKIP(7);
9015 SKIP_BLANKS;
9016 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009017 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009018 return(NULL);
9019 }
9020 NEXT;
9021 SKIP_BLANKS;
9022 if (RAW == '"') {
9023 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009024 version = xmlParseVersionNum(ctxt);
9025 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009026 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009027 } else
9028 NEXT;
9029 } else if (RAW == '\''){
9030 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009031 version = xmlParseVersionNum(ctxt);
9032 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009034 } else
9035 NEXT;
9036 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009037 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009038 }
9039 }
9040 return(version);
9041}
9042
9043/**
9044 * xmlParseEncName:
9045 * @ctxt: an XML parser context
9046 *
9047 * parse the XML encoding name
9048 *
9049 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9050 *
9051 * Returns the encoding name value or NULL
9052 */
9053xmlChar *
9054xmlParseEncName(xmlParserCtxtPtr ctxt) {
9055 xmlChar *buf = NULL;
9056 int len = 0;
9057 int size = 10;
9058 xmlChar cur;
9059
9060 cur = CUR;
9061 if (((cur >= 'a') && (cur <= 'z')) ||
9062 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009063 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009064 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009065 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009066 return(NULL);
9067 }
9068
9069 buf[len++] = cur;
9070 NEXT;
9071 cur = CUR;
9072 while (((cur >= 'a') && (cur <= 'z')) ||
9073 ((cur >= 'A') && (cur <= 'Z')) ||
9074 ((cur >= '0') && (cur <= '9')) ||
9075 (cur == '.') || (cur == '_') ||
9076 (cur == '-')) {
9077 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009078 xmlChar *tmp;
9079
Owen Taylor3473f882001-02-23 17:55:21 +00009080 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009081 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9082 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009083 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009084 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009085 return(NULL);
9086 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009087 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009088 }
9089 buf[len++] = cur;
9090 NEXT;
9091 cur = CUR;
9092 if (cur == 0) {
9093 SHRINK;
9094 GROW;
9095 cur = CUR;
9096 }
9097 }
9098 buf[len] = 0;
9099 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009100 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009101 }
9102 return(buf);
9103}
9104
9105/**
9106 * xmlParseEncodingDecl:
9107 * @ctxt: an XML parser context
9108 *
9109 * parse the XML encoding declaration
9110 *
9111 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9112 *
9113 * this setups the conversion filters.
9114 *
9115 * Returns the encoding value or NULL
9116 */
9117
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009118const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009119xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9120 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009121
9122 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009123 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009124 SKIP(8);
9125 SKIP_BLANKS;
9126 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009127 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009128 return(NULL);
9129 }
9130 NEXT;
9131 SKIP_BLANKS;
9132 if (RAW == '"') {
9133 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009134 encoding = xmlParseEncName(ctxt);
9135 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009136 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009137 } else
9138 NEXT;
9139 } else if (RAW == '\''){
9140 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009141 encoding = xmlParseEncName(ctxt);
9142 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009143 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009144 } else
9145 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009146 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009148 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009149 /*
9150 * UTF-16 encoding stwich has already taken place at this stage,
9151 * more over the little-endian/big-endian selection is already done
9152 */
9153 if ((encoding != NULL) &&
9154 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9155 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009156 if (ctxt->encoding != NULL)
9157 xmlFree((xmlChar *) ctxt->encoding);
9158 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009159 }
9160 /*
9161 * UTF-8 encoding is handled natively
9162 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009163 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009164 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9165 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009166 if (ctxt->encoding != NULL)
9167 xmlFree((xmlChar *) ctxt->encoding);
9168 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009169 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009170 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009171 xmlCharEncodingHandlerPtr handler;
9172
9173 if (ctxt->input->encoding != NULL)
9174 xmlFree((xmlChar *) ctxt->input->encoding);
9175 ctxt->input->encoding = encoding;
9176
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009177 handler = xmlFindCharEncodingHandler((const char *) encoding);
9178 if (handler != NULL) {
9179 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009180 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009181 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009182 "Unsupported encoding %s\n", encoding);
9183 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009184 }
9185 }
9186 }
9187 return(encoding);
9188}
9189
9190/**
9191 * xmlParseSDDecl:
9192 * @ctxt: an XML parser context
9193 *
9194 * parse the XML standalone declaration
9195 *
9196 * [32] SDDecl ::= S 'standalone' Eq
9197 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9198 *
9199 * [ VC: Standalone Document Declaration ]
9200 * TODO The standalone document declaration must have the value "no"
9201 * if any external markup declarations contain declarations of:
9202 * - attributes with default values, if elements to which these
9203 * attributes apply appear in the document without specifications
9204 * of values for these attributes, or
9205 * - entities (other than amp, lt, gt, apos, quot), if references
9206 * to those entities appear in the document, or
9207 * - attributes with values subject to normalization, where the
9208 * attribute appears in the document with a value which will change
9209 * as a result of normalization, or
9210 * - element types with element content, if white space occurs directly
9211 * within any instance of those types.
9212 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009213 * Returns:
9214 * 1 if standalone="yes"
9215 * 0 if standalone="no"
9216 * -2 if standalone attribute is missing or invalid
9217 * (A standalone value of -2 means that the XML declaration was found,
9218 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009219 */
9220
9221int
9222xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009223 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009224
9225 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009226 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009227 SKIP(10);
9228 SKIP_BLANKS;
9229 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009230 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009231 return(standalone);
9232 }
9233 NEXT;
9234 SKIP_BLANKS;
9235 if (RAW == '\''){
9236 NEXT;
9237 if ((RAW == 'n') && (NXT(1) == 'o')) {
9238 standalone = 0;
9239 SKIP(2);
9240 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9241 (NXT(2) == 's')) {
9242 standalone = 1;
9243 SKIP(3);
9244 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009245 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009246 }
9247 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009248 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009249 } else
9250 NEXT;
9251 } else if (RAW == '"'){
9252 NEXT;
9253 if ((RAW == 'n') && (NXT(1) == 'o')) {
9254 standalone = 0;
9255 SKIP(2);
9256 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9257 (NXT(2) == 's')) {
9258 standalone = 1;
9259 SKIP(3);
9260 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009261 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009262 }
9263 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009265 } else
9266 NEXT;
9267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009269 }
9270 }
9271 return(standalone);
9272}
9273
9274/**
9275 * xmlParseXMLDecl:
9276 * @ctxt: an XML parser context
9277 *
9278 * parse an XML declaration header
9279 *
9280 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9281 */
9282
9283void
9284xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9285 xmlChar *version;
9286
9287 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009288 * This value for standalone indicates that the document has an
9289 * XML declaration but it does not have a standalone attribute.
9290 * It will be overwritten later if a standalone attribute is found.
9291 */
9292 ctxt->input->standalone = -2;
9293
9294 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009295 * We know that '<?xml' is here.
9296 */
9297 SKIP(5);
9298
William M. Brack76e95df2003-10-18 16:20:14 +00009299 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009300 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9301 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009302 }
9303 SKIP_BLANKS;
9304
9305 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009306 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009307 */
9308 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009309 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009310 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009311 } else {
9312 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9313 /*
9314 * TODO: Blueberry should be detected here
9315 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009316 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9317 "Unsupported version '%s'\n",
9318 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009319 }
9320 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009321 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009322 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009323 }
Owen Taylor3473f882001-02-23 17:55:21 +00009324
9325 /*
9326 * We may have the encoding declaration
9327 */
William M. Brack76e95df2003-10-18 16:20:14 +00009328 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009329 if ((RAW == '?') && (NXT(1) == '>')) {
9330 SKIP(2);
9331 return;
9332 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009333 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009334 }
9335 xmlParseEncodingDecl(ctxt);
9336 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9337 /*
9338 * The XML REC instructs us to stop parsing right here
9339 */
9340 return;
9341 }
9342
9343 /*
9344 * We may have the standalone status.
9345 */
William M. Brack76e95df2003-10-18 16:20:14 +00009346 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009347 if ((RAW == '?') && (NXT(1) == '>')) {
9348 SKIP(2);
9349 return;
9350 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009352 }
9353 SKIP_BLANKS;
9354 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9355
9356 SKIP_BLANKS;
9357 if ((RAW == '?') && (NXT(1) == '>')) {
9358 SKIP(2);
9359 } else if (RAW == '>') {
9360 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009361 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009362 NEXT;
9363 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009364 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009365 MOVETO_ENDTAG(CUR_PTR);
9366 NEXT;
9367 }
9368}
9369
9370/**
9371 * xmlParseMisc:
9372 * @ctxt: an XML parser context
9373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009374 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009375 *
9376 * [27] Misc ::= Comment | PI | S
9377 */
9378
9379void
9380xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009381 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009382 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009383 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009384 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009385 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009386 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009387 NEXT;
9388 } else
9389 xmlParseComment(ctxt);
9390 }
9391}
9392
9393/**
9394 * xmlParseDocument:
9395 * @ctxt: an XML parser context
9396 *
9397 * parse an XML document (and build a tree if using the standard SAX
9398 * interface).
9399 *
9400 * [1] document ::= prolog element Misc*
9401 *
9402 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9403 *
9404 * Returns 0, -1 in case of error. the parser context is augmented
9405 * as a result of the parsing.
9406 */
9407
9408int
9409xmlParseDocument(xmlParserCtxtPtr ctxt) {
9410 xmlChar start[4];
9411 xmlCharEncoding enc;
9412
9413 xmlInitParser();
9414
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009415 if ((ctxt == NULL) || (ctxt->input == NULL))
9416 return(-1);
9417
Owen Taylor3473f882001-02-23 17:55:21 +00009418 GROW;
9419
9420 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009421 * SAX: detecting the level.
9422 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009423 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009424
9425 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009426 * SAX: beginning of the document processing.
9427 */
9428 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9429 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9430
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009431 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9432 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009433 /*
9434 * Get the 4 first bytes and decode the charset
9435 * if enc != XML_CHAR_ENCODING_NONE
9436 * plug some encoding conversion routines.
9437 */
9438 start[0] = RAW;
9439 start[1] = NXT(1);
9440 start[2] = NXT(2);
9441 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009442 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009443 if (enc != XML_CHAR_ENCODING_NONE) {
9444 xmlSwitchEncoding(ctxt, enc);
9445 }
Owen Taylor3473f882001-02-23 17:55:21 +00009446 }
9447
9448
9449 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009450 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009451 }
9452
9453 /*
9454 * Check for the XMLDecl in the Prolog.
9455 */
9456 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009457 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009458
9459 /*
9460 * Note that we will switch encoding on the fly.
9461 */
9462 xmlParseXMLDecl(ctxt);
9463 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9464 /*
9465 * The XML REC instructs us to stop parsing right here
9466 */
9467 return(-1);
9468 }
9469 ctxt->standalone = ctxt->input->standalone;
9470 SKIP_BLANKS;
9471 } else {
9472 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9473 }
9474 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9475 ctxt->sax->startDocument(ctxt->userData);
9476
9477 /*
9478 * The Misc part of the Prolog
9479 */
9480 GROW;
9481 xmlParseMisc(ctxt);
9482
9483 /*
9484 * Then possibly doc type declaration(s) and more Misc
9485 * (doctypedecl Misc*)?
9486 */
9487 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009488 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009489
9490 ctxt->inSubset = 1;
9491 xmlParseDocTypeDecl(ctxt);
9492 if (RAW == '[') {
9493 ctxt->instate = XML_PARSER_DTD;
9494 xmlParseInternalSubset(ctxt);
9495 }
9496
9497 /*
9498 * Create and update the external subset.
9499 */
9500 ctxt->inSubset = 2;
9501 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9502 (!ctxt->disableSAX))
9503 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9504 ctxt->extSubSystem, ctxt->extSubURI);
9505 ctxt->inSubset = 0;
9506
Daniel Veillardac4118d2008-01-11 05:27:32 +00009507 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009508
9509 ctxt->instate = XML_PARSER_PROLOG;
9510 xmlParseMisc(ctxt);
9511 }
9512
9513 /*
9514 * Time to start parsing the tree itself
9515 */
9516 GROW;
9517 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009518 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9519 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009520 } else {
9521 ctxt->instate = XML_PARSER_CONTENT;
9522 xmlParseElement(ctxt);
9523 ctxt->instate = XML_PARSER_EPILOG;
9524
9525
9526 /*
9527 * The Misc part at the end
9528 */
9529 xmlParseMisc(ctxt);
9530
Daniel Veillard561b7f82002-03-20 21:55:57 +00009531 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009532 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009533 }
9534 ctxt->instate = XML_PARSER_EOF;
9535 }
9536
9537 /*
9538 * SAX: end of the document processing.
9539 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009540 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009541 ctxt->sax->endDocument(ctxt->userData);
9542
Daniel Veillard5997aca2002-03-18 18:36:20 +00009543 /*
9544 * Remove locally kept entity definitions if the tree was not built
9545 */
9546 if ((ctxt->myDoc != NULL) &&
9547 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9548 xmlFreeDoc(ctxt->myDoc);
9549 ctxt->myDoc = NULL;
9550 }
9551
Daniel Veillardc7612992002-02-17 22:47:37 +00009552 if (! ctxt->wellFormed) {
9553 ctxt->valid = 0;
9554 return(-1);
9555 }
Owen Taylor3473f882001-02-23 17:55:21 +00009556 return(0);
9557}
9558
9559/**
9560 * xmlParseExtParsedEnt:
9561 * @ctxt: an XML parser context
9562 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009563 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009564 * An external general parsed entity is well-formed if it matches the
9565 * production labeled extParsedEnt.
9566 *
9567 * [78] extParsedEnt ::= TextDecl? content
9568 *
9569 * Returns 0, -1 in case of error. the parser context is augmented
9570 * as a result of the parsing.
9571 */
9572
9573int
9574xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9575 xmlChar start[4];
9576 xmlCharEncoding enc;
9577
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009578 if ((ctxt == NULL) || (ctxt->input == NULL))
9579 return(-1);
9580
Owen Taylor3473f882001-02-23 17:55:21 +00009581 xmlDefaultSAXHandlerInit();
9582
Daniel Veillard309f81d2003-09-23 09:02:53 +00009583 xmlDetectSAX2(ctxt);
9584
Owen Taylor3473f882001-02-23 17:55:21 +00009585 GROW;
9586
9587 /*
9588 * SAX: beginning of the document processing.
9589 */
9590 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9591 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9592
9593 /*
9594 * Get the 4 first bytes and decode the charset
9595 * if enc != XML_CHAR_ENCODING_NONE
9596 * plug some encoding conversion routines.
9597 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009598 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9599 start[0] = RAW;
9600 start[1] = NXT(1);
9601 start[2] = NXT(2);
9602 start[3] = NXT(3);
9603 enc = xmlDetectCharEncoding(start, 4);
9604 if (enc != XML_CHAR_ENCODING_NONE) {
9605 xmlSwitchEncoding(ctxt, enc);
9606 }
Owen Taylor3473f882001-02-23 17:55:21 +00009607 }
9608
9609
9610 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009611 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009612 }
9613
9614 /*
9615 * Check for the XMLDecl in the Prolog.
9616 */
9617 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009618 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009619
9620 /*
9621 * Note that we will switch encoding on the fly.
9622 */
9623 xmlParseXMLDecl(ctxt);
9624 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9625 /*
9626 * The XML REC instructs us to stop parsing right here
9627 */
9628 return(-1);
9629 }
9630 SKIP_BLANKS;
9631 } else {
9632 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9633 }
9634 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9635 ctxt->sax->startDocument(ctxt->userData);
9636
9637 /*
9638 * Doing validity checking on chunk doesn't make sense
9639 */
9640 ctxt->instate = XML_PARSER_CONTENT;
9641 ctxt->validate = 0;
9642 ctxt->loadsubset = 0;
9643 ctxt->depth = 0;
9644
9645 xmlParseContent(ctxt);
9646
9647 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009648 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009649 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009650 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009651 }
9652
9653 /*
9654 * SAX: end of the document processing.
9655 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009656 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009657 ctxt->sax->endDocument(ctxt->userData);
9658
9659 if (! ctxt->wellFormed) return(-1);
9660 return(0);
9661}
9662
Daniel Veillard73b013f2003-09-30 12:36:01 +00009663#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009664/************************************************************************
9665 * *
9666 * Progressive parsing interfaces *
9667 * *
9668 ************************************************************************/
9669
9670/**
9671 * xmlParseLookupSequence:
9672 * @ctxt: an XML parser context
9673 * @first: the first char to lookup
9674 * @next: the next char to lookup or zero
9675 * @third: the next char to lookup or zero
9676 *
9677 * Try to find if a sequence (first, next, third) or just (first next) or
9678 * (first) is available in the input stream.
9679 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9680 * to avoid rescanning sequences of bytes, it DOES change the state of the
9681 * parser, do not use liberally.
9682 *
9683 * Returns the index to the current parsing point if the full sequence
9684 * is available, -1 otherwise.
9685 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009686static int
Owen Taylor3473f882001-02-23 17:55:21 +00009687xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9688 xmlChar next, xmlChar third) {
9689 int base, len;
9690 xmlParserInputPtr in;
9691 const xmlChar *buf;
9692
9693 in = ctxt->input;
9694 if (in == NULL) return(-1);
9695 base = in->cur - in->base;
9696 if (base < 0) return(-1);
9697 if (ctxt->checkIndex > base)
9698 base = ctxt->checkIndex;
9699 if (in->buf == NULL) {
9700 buf = in->base;
9701 len = in->length;
9702 } else {
9703 buf = in->buf->buffer->content;
9704 len = in->buf->buffer->use;
9705 }
9706 /* take into account the sequence length */
9707 if (third) len -= 2;
9708 else if (next) len --;
9709 for (;base < len;base++) {
9710 if (buf[base] == first) {
9711 if (third != 0) {
9712 if ((buf[base + 1] != next) ||
9713 (buf[base + 2] != third)) continue;
9714 } else if (next != 0) {
9715 if (buf[base + 1] != next) continue;
9716 }
9717 ctxt->checkIndex = 0;
9718#ifdef DEBUG_PUSH
9719 if (next == 0)
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: lookup '%c' found at %d\n",
9722 first, base);
9723 else if (third == 0)
9724 xmlGenericError(xmlGenericErrorContext,
9725 "PP: lookup '%c%c' found at %d\n",
9726 first, next, base);
9727 else
9728 xmlGenericError(xmlGenericErrorContext,
9729 "PP: lookup '%c%c%c' found at %d\n",
9730 first, next, third, base);
9731#endif
9732 return(base - (in->cur - in->base));
9733 }
9734 }
9735 ctxt->checkIndex = base;
9736#ifdef DEBUG_PUSH
9737 if (next == 0)
9738 xmlGenericError(xmlGenericErrorContext,
9739 "PP: lookup '%c' failed\n", first);
9740 else if (third == 0)
9741 xmlGenericError(xmlGenericErrorContext,
9742 "PP: lookup '%c%c' failed\n", first, next);
9743 else
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: lookup '%c%c%c' failed\n", first, next, third);
9746#endif
9747 return(-1);
9748}
9749
9750/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 * xmlParseGetLasts:
9752 * @ctxt: an XML parser context
9753 * @lastlt: pointer to store the last '<' from the input
9754 * @lastgt: pointer to store the last '>' from the input
9755 *
9756 * Lookup the last < and > in the current chunk
9757 */
9758static void
9759xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9760 const xmlChar **lastgt) {
9761 const xmlChar *tmp;
9762
9763 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9764 xmlGenericError(xmlGenericErrorContext,
9765 "Internal error: xmlParseGetLasts\n");
9766 return;
9767 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009768 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009769 tmp = ctxt->input->end;
9770 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009771 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009772 if (tmp < ctxt->input->base) {
9773 *lastlt = NULL;
9774 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009775 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009776 *lastlt = tmp;
9777 tmp++;
9778 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9779 if (*tmp == '\'') {
9780 tmp++;
9781 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9782 if (tmp < ctxt->input->end) tmp++;
9783 } else if (*tmp == '"') {
9784 tmp++;
9785 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9786 if (tmp < ctxt->input->end) tmp++;
9787 } else
9788 tmp++;
9789 }
9790 if (tmp < ctxt->input->end)
9791 *lastgt = tmp;
9792 else {
9793 tmp = *lastlt;
9794 tmp--;
9795 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9796 if (tmp >= ctxt->input->base)
9797 *lastgt = tmp;
9798 else
9799 *lastgt = NULL;
9800 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009801 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009802 } else {
9803 *lastlt = NULL;
9804 *lastgt = NULL;
9805 }
9806}
9807/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009808 * xmlCheckCdataPush:
9809 * @cur: pointer to the bock of characters
9810 * @len: length of the block in bytes
9811 *
9812 * Check that the block of characters is okay as SCdata content [20]
9813 *
9814 * Returns the number of bytes to pass if okay, a negative index where an
9815 * UTF-8 error occured otherwise
9816 */
9817static int
9818xmlCheckCdataPush(const xmlChar *utf, int len) {
9819 int ix;
9820 unsigned char c;
9821 int codepoint;
9822
9823 if ((utf == NULL) || (len <= 0))
9824 return(0);
9825
9826 for (ix = 0; ix < len;) { /* string is 0-terminated */
9827 c = utf[ix];
9828 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9829 if (c >= 0x20)
9830 ix++;
9831 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9832 ix++;
9833 else
9834 return(-ix);
9835 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9836 if (ix + 2 > len) return(ix);
9837 if ((utf[ix+1] & 0xc0 ) != 0x80)
9838 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009839 codepoint = (utf[ix] & 0x1f) << 6;
9840 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009841 if (!xmlIsCharQ(codepoint))
9842 return(-ix);
9843 ix += 2;
9844 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9845 if (ix + 3 > len) return(ix);
9846 if (((utf[ix+1] & 0xc0) != 0x80) ||
9847 ((utf[ix+2] & 0xc0) != 0x80))
9848 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009849 codepoint = (utf[ix] & 0xf) << 12;
9850 codepoint |= (utf[ix+1] & 0x3f) << 6;
9851 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009852 if (!xmlIsCharQ(codepoint))
9853 return(-ix);
9854 ix += 3;
9855 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9856 if (ix + 4 > len) return(ix);
9857 if (((utf[ix+1] & 0xc0) != 0x80) ||
9858 ((utf[ix+2] & 0xc0) != 0x80) ||
9859 ((utf[ix+3] & 0xc0) != 0x80))
9860 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009861 codepoint = (utf[ix] & 0x7) << 18;
9862 codepoint |= (utf[ix+1] & 0x3f) << 12;
9863 codepoint |= (utf[ix+2] & 0x3f) << 6;
9864 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009865 if (!xmlIsCharQ(codepoint))
9866 return(-ix);
9867 ix += 4;
9868 } else /* unknown encoding */
9869 return(-ix);
9870 }
9871 return(ix);
9872}
9873
9874/**
Owen Taylor3473f882001-02-23 17:55:21 +00009875 * xmlParseTryOrFinish:
9876 * @ctxt: an XML parser context
9877 * @terminate: last chunk indicator
9878 *
9879 * Try to progress on parsing
9880 *
9881 * Returns zero if no parsing was possible
9882 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009883static int
Owen Taylor3473f882001-02-23 17:55:21 +00009884xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9885 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009886 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009887 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009888 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009889
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009890 if (ctxt->input == NULL)
9891 return(0);
9892
Owen Taylor3473f882001-02-23 17:55:21 +00009893#ifdef DEBUG_PUSH
9894 switch (ctxt->instate) {
9895 case XML_PARSER_EOF:
9896 xmlGenericError(xmlGenericErrorContext,
9897 "PP: try EOF\n"); break;
9898 case XML_PARSER_START:
9899 xmlGenericError(xmlGenericErrorContext,
9900 "PP: try START\n"); break;
9901 case XML_PARSER_MISC:
9902 xmlGenericError(xmlGenericErrorContext,
9903 "PP: try MISC\n");break;
9904 case XML_PARSER_COMMENT:
9905 xmlGenericError(xmlGenericErrorContext,
9906 "PP: try COMMENT\n");break;
9907 case XML_PARSER_PROLOG:
9908 xmlGenericError(xmlGenericErrorContext,
9909 "PP: try PROLOG\n");break;
9910 case XML_PARSER_START_TAG:
9911 xmlGenericError(xmlGenericErrorContext,
9912 "PP: try START_TAG\n");break;
9913 case XML_PARSER_CONTENT:
9914 xmlGenericError(xmlGenericErrorContext,
9915 "PP: try CONTENT\n");break;
9916 case XML_PARSER_CDATA_SECTION:
9917 xmlGenericError(xmlGenericErrorContext,
9918 "PP: try CDATA_SECTION\n");break;
9919 case XML_PARSER_END_TAG:
9920 xmlGenericError(xmlGenericErrorContext,
9921 "PP: try END_TAG\n");break;
9922 case XML_PARSER_ENTITY_DECL:
9923 xmlGenericError(xmlGenericErrorContext,
9924 "PP: try ENTITY_DECL\n");break;
9925 case XML_PARSER_ENTITY_VALUE:
9926 xmlGenericError(xmlGenericErrorContext,
9927 "PP: try ENTITY_VALUE\n");break;
9928 case XML_PARSER_ATTRIBUTE_VALUE:
9929 xmlGenericError(xmlGenericErrorContext,
9930 "PP: try ATTRIBUTE_VALUE\n");break;
9931 case XML_PARSER_DTD:
9932 xmlGenericError(xmlGenericErrorContext,
9933 "PP: try DTD\n");break;
9934 case XML_PARSER_EPILOG:
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: try EPILOG\n");break;
9937 case XML_PARSER_PI:
9938 xmlGenericError(xmlGenericErrorContext,
9939 "PP: try PI\n");break;
9940 case XML_PARSER_IGNORE:
9941 xmlGenericError(xmlGenericErrorContext,
9942 "PP: try IGNORE\n");break;
9943 }
9944#endif
9945
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009946 if ((ctxt->input != NULL) &&
9947 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009948 xmlSHRINK(ctxt);
9949 ctxt->checkIndex = 0;
9950 }
9951 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009952
Daniel Veillarda880b122003-04-21 21:36:41 +00009953 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009954 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009955 return(0);
9956
9957
Owen Taylor3473f882001-02-23 17:55:21 +00009958 /*
9959 * Pop-up of finished entities.
9960 */
9961 while ((RAW == 0) && (ctxt->inputNr > 1))
9962 xmlPopInput(ctxt);
9963
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009964 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009965 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009966 avail = ctxt->input->length -
9967 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009968 else {
9969 /*
9970 * If we are operating on converted input, try to flush
9971 * remainng chars to avoid them stalling in the non-converted
9972 * buffer.
9973 */
9974 if ((ctxt->input->buf->raw != NULL) &&
9975 (ctxt->input->buf->raw->use > 0)) {
9976 int base = ctxt->input->base -
9977 ctxt->input->buf->buffer->content;
9978 int current = ctxt->input->cur - ctxt->input->base;
9979
9980 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9981 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9982 ctxt->input->cur = ctxt->input->base + current;
9983 ctxt->input->end =
9984 &ctxt->input->buf->buffer->content[
9985 ctxt->input->buf->buffer->use];
9986 }
9987 avail = ctxt->input->buf->buffer->use -
9988 (ctxt->input->cur - ctxt->input->base);
9989 }
Owen Taylor3473f882001-02-23 17:55:21 +00009990 if (avail < 1)
9991 goto done;
9992 switch (ctxt->instate) {
9993 case XML_PARSER_EOF:
9994 /*
9995 * Document parsing is done !
9996 */
9997 goto done;
9998 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009999 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10000 xmlChar start[4];
10001 xmlCharEncoding enc;
10002
10003 /*
10004 * Very first chars read from the document flow.
10005 */
10006 if (avail < 4)
10007 goto done;
10008
10009 /*
10010 * Get the 4 first bytes and decode the charset
10011 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010012 * plug some encoding conversion routines,
10013 * else xmlSwitchEncoding will set to (default)
10014 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010015 */
10016 start[0] = RAW;
10017 start[1] = NXT(1);
10018 start[2] = NXT(2);
10019 start[3] = NXT(3);
10020 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010021 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010022 break;
10023 }
Owen Taylor3473f882001-02-23 17:55:21 +000010024
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010025 if (avail < 2)
10026 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010027 cur = ctxt->input->cur[0];
10028 next = ctxt->input->cur[1];
10029 if (cur == 0) {
10030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10031 ctxt->sax->setDocumentLocator(ctxt->userData,
10032 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010033 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010034 ctxt->instate = XML_PARSER_EOF;
10035#ifdef DEBUG_PUSH
10036 xmlGenericError(xmlGenericErrorContext,
10037 "PP: entering EOF\n");
10038#endif
10039 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10040 ctxt->sax->endDocument(ctxt->userData);
10041 goto done;
10042 }
10043 if ((cur == '<') && (next == '?')) {
10044 /* PI or XML decl */
10045 if (avail < 5) return(ret);
10046 if ((!terminate) &&
10047 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10048 return(ret);
10049 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10050 ctxt->sax->setDocumentLocator(ctxt->userData,
10051 &xmlDefaultSAXLocator);
10052 if ((ctxt->input->cur[2] == 'x') &&
10053 (ctxt->input->cur[3] == 'm') &&
10054 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010055 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010056 ret += 5;
10057#ifdef DEBUG_PUSH
10058 xmlGenericError(xmlGenericErrorContext,
10059 "PP: Parsing XML Decl\n");
10060#endif
10061 xmlParseXMLDecl(ctxt);
10062 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10063 /*
10064 * The XML REC instructs us to stop parsing right
10065 * here
10066 */
10067 ctxt->instate = XML_PARSER_EOF;
10068 return(0);
10069 }
10070 ctxt->standalone = ctxt->input->standalone;
10071 if ((ctxt->encoding == NULL) &&
10072 (ctxt->input->encoding != NULL))
10073 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10074 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10075 (!ctxt->disableSAX))
10076 ctxt->sax->startDocument(ctxt->userData);
10077 ctxt->instate = XML_PARSER_MISC;
10078#ifdef DEBUG_PUSH
10079 xmlGenericError(xmlGenericErrorContext,
10080 "PP: entering MISC\n");
10081#endif
10082 } else {
10083 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10084 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10085 (!ctxt->disableSAX))
10086 ctxt->sax->startDocument(ctxt->userData);
10087 ctxt->instate = XML_PARSER_MISC;
10088#ifdef DEBUG_PUSH
10089 xmlGenericError(xmlGenericErrorContext,
10090 "PP: entering MISC\n");
10091#endif
10092 }
10093 } else {
10094 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10095 ctxt->sax->setDocumentLocator(ctxt->userData,
10096 &xmlDefaultSAXLocator);
10097 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010098 if (ctxt->version == NULL) {
10099 xmlErrMemory(ctxt, NULL);
10100 break;
10101 }
Owen Taylor3473f882001-02-23 17:55:21 +000010102 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10103 (!ctxt->disableSAX))
10104 ctxt->sax->startDocument(ctxt->userData);
10105 ctxt->instate = XML_PARSER_MISC;
10106#ifdef DEBUG_PUSH
10107 xmlGenericError(xmlGenericErrorContext,
10108 "PP: entering MISC\n");
10109#endif
10110 }
10111 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010112 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010113 const xmlChar *name;
10114 const xmlChar *prefix;
10115 const xmlChar *URI;
10116 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010117
10118 if ((avail < 2) && (ctxt->inputNr == 1))
10119 goto done;
10120 cur = ctxt->input->cur[0];
10121 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010122 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010123 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010124 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10125 ctxt->sax->endDocument(ctxt->userData);
10126 goto done;
10127 }
10128 if (!terminate) {
10129 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010130 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010131 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010132 goto done;
10133 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10134 goto done;
10135 }
10136 }
10137 if (ctxt->spaceNr == 0)
10138 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010139 else if (*ctxt->space == -2)
10140 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010141 else
10142 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010143#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010144 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010145#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010146 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010147#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010148 else
10149 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010150#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010151 if (name == NULL) {
10152 spacePop(ctxt);
10153 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010154 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10155 ctxt->sax->endDocument(ctxt->userData);
10156 goto done;
10157 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010158#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010159 /*
10160 * [ VC: Root Element Type ]
10161 * The Name in the document type declaration must match
10162 * the element type of the root element.
10163 */
10164 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10165 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10166 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010167#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010168
10169 /*
10170 * Check for an Empty Element.
10171 */
10172 if ((RAW == '/') && (NXT(1) == '>')) {
10173 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010174
10175 if (ctxt->sax2) {
10176 if ((ctxt->sax != NULL) &&
10177 (ctxt->sax->endElementNs != NULL) &&
10178 (!ctxt->disableSAX))
10179 ctxt->sax->endElementNs(ctxt->userData, name,
10180 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010181 if (ctxt->nsNr - nsNr > 0)
10182 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010183#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010184 } else {
10185 if ((ctxt->sax != NULL) &&
10186 (ctxt->sax->endElement != NULL) &&
10187 (!ctxt->disableSAX))
10188 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010189#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010190 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010191 spacePop(ctxt);
10192 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010193 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010194 } else {
10195 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010196 }
10197 break;
10198 }
10199 if (RAW == '>') {
10200 NEXT;
10201 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010202 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010203 "Couldn't find end of Start Tag %s\n",
10204 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010205 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010206 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010207 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010208 if (ctxt->sax2)
10209 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010210#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010211 else
10212 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010213#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010214
Daniel Veillarda880b122003-04-21 21:36:41 +000010215 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010216 break;
10217 }
10218 case XML_PARSER_CONTENT: {
10219 const xmlChar *test;
10220 unsigned int cons;
10221 if ((avail < 2) && (ctxt->inputNr == 1))
10222 goto done;
10223 cur = ctxt->input->cur[0];
10224 next = ctxt->input->cur[1];
10225
10226 test = CUR_PTR;
10227 cons = ctxt->input->consumed;
10228 if ((cur == '<') && (next == '/')) {
10229 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010230 break;
10231 } else if ((cur == '<') && (next == '?')) {
10232 if ((!terminate) &&
10233 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10234 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010235 xmlParsePI(ctxt);
10236 } else if ((cur == '<') && (next != '!')) {
10237 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010238 break;
10239 } else if ((cur == '<') && (next == '!') &&
10240 (ctxt->input->cur[2] == '-') &&
10241 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010242 int term;
10243
10244 if (avail < 4)
10245 goto done;
10246 ctxt->input->cur += 4;
10247 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10248 ctxt->input->cur -= 4;
10249 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010250 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010251 xmlParseComment(ctxt);
10252 ctxt->instate = XML_PARSER_CONTENT;
10253 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10254 (ctxt->input->cur[2] == '[') &&
10255 (ctxt->input->cur[3] == 'C') &&
10256 (ctxt->input->cur[4] == 'D') &&
10257 (ctxt->input->cur[5] == 'A') &&
10258 (ctxt->input->cur[6] == 'T') &&
10259 (ctxt->input->cur[7] == 'A') &&
10260 (ctxt->input->cur[8] == '[')) {
10261 SKIP(9);
10262 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010263 break;
10264 } else if ((cur == '<') && (next == '!') &&
10265 (avail < 9)) {
10266 goto done;
10267 } else if (cur == '&') {
10268 if ((!terminate) &&
10269 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10270 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010271 xmlParseReference(ctxt);
10272 } else {
10273 /* TODO Avoid the extra copy, handle directly !!! */
10274 /*
10275 * Goal of the following test is:
10276 * - minimize calls to the SAX 'character' callback
10277 * when they are mergeable
10278 * - handle an problem for isBlank when we only parse
10279 * a sequence of blank chars and the next one is
10280 * not available to check against '<' presence.
10281 * - tries to homogenize the differences in SAX
10282 * callbacks between the push and pull versions
10283 * of the parser.
10284 */
10285 if ((ctxt->inputNr == 1) &&
10286 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10287 if (!terminate) {
10288 if (ctxt->progressive) {
10289 if ((lastlt == NULL) ||
10290 (ctxt->input->cur > lastlt))
10291 goto done;
10292 } else if (xmlParseLookupSequence(ctxt,
10293 '<', 0, 0) < 0) {
10294 goto done;
10295 }
10296 }
10297 }
10298 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010299 xmlParseCharData(ctxt, 0);
10300 }
10301 /*
10302 * Pop-up of finished entities.
10303 */
10304 while ((RAW == 0) && (ctxt->inputNr > 1))
10305 xmlPopInput(ctxt);
10306 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010307 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10308 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010309 ctxt->instate = XML_PARSER_EOF;
10310 break;
10311 }
10312 break;
10313 }
10314 case XML_PARSER_END_TAG:
10315 if (avail < 2)
10316 goto done;
10317 if (!terminate) {
10318 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010319 /* > can be found unescaped in attribute values */
10320 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010321 goto done;
10322 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10323 goto done;
10324 }
10325 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010326 if (ctxt->sax2) {
10327 xmlParseEndTag2(ctxt,
10328 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10329 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010330 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010331 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010332 }
10333#ifdef LIBXML_SAX1_ENABLED
10334 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010335 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010336#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010337 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010338 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010339 } else {
10340 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010341 }
10342 break;
10343 case XML_PARSER_CDATA_SECTION: {
10344 /*
10345 * The Push mode need to have the SAX callback for
10346 * cdataBlock merge back contiguous callbacks.
10347 */
10348 int base;
10349
10350 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10351 if (base < 0) {
10352 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010353 int tmp;
10354
10355 tmp = xmlCheckCdataPush(ctxt->input->cur,
10356 XML_PARSER_BIG_BUFFER_SIZE);
10357 if (tmp < 0) {
10358 tmp = -tmp;
10359 ctxt->input->cur += tmp;
10360 goto encoding_error;
10361 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10363 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010364 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010365 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010366 else if (ctxt->sax->characters != NULL)
10367 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010368 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010369 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010370 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010371 ctxt->checkIndex = 0;
10372 }
10373 goto done;
10374 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010375 int tmp;
10376
10377 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10378 if ((tmp < 0) || (tmp != base)) {
10379 tmp = -tmp;
10380 ctxt->input->cur += tmp;
10381 goto encoding_error;
10382 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010383 if ((ctxt->sax != NULL) && (base == 0) &&
10384 (ctxt->sax->cdataBlock != NULL) &&
10385 (!ctxt->disableSAX)) {
10386 /*
10387 * Special case to provide identical behaviour
10388 * between pull and push parsers on enpty CDATA
10389 * sections
10390 */
10391 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10392 (!strncmp((const char *)&ctxt->input->cur[-9],
10393 "<![CDATA[", 9)))
10394 ctxt->sax->cdataBlock(ctxt->userData,
10395 BAD_CAST "", 0);
10396 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010397 (!ctxt->disableSAX)) {
10398 if (ctxt->sax->cdataBlock != NULL)
10399 ctxt->sax->cdataBlock(ctxt->userData,
10400 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010401 else if (ctxt->sax->characters != NULL)
10402 ctxt->sax->characters(ctxt->userData,
10403 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010404 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010405 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010406 ctxt->checkIndex = 0;
10407 ctxt->instate = XML_PARSER_CONTENT;
10408#ifdef DEBUG_PUSH
10409 xmlGenericError(xmlGenericErrorContext,
10410 "PP: entering CONTENT\n");
10411#endif
10412 }
10413 break;
10414 }
Owen Taylor3473f882001-02-23 17:55:21 +000010415 case XML_PARSER_MISC:
10416 SKIP_BLANKS;
10417 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010418 avail = ctxt->input->length -
10419 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010420 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010421 avail = ctxt->input->buf->buffer->use -
10422 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010423 if (avail < 2)
10424 goto done;
10425 cur = ctxt->input->cur[0];
10426 next = ctxt->input->cur[1];
10427 if ((cur == '<') && (next == '?')) {
10428 if ((!terminate) &&
10429 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10430 goto done;
10431#ifdef DEBUG_PUSH
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: Parsing PI\n");
10434#endif
10435 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010436 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010437 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010438 (ctxt->input->cur[2] == '-') &&
10439 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010440 if ((!terminate) &&
10441 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10442 goto done;
10443#ifdef DEBUG_PUSH
10444 xmlGenericError(xmlGenericErrorContext,
10445 "PP: Parsing Comment\n");
10446#endif
10447 xmlParseComment(ctxt);
10448 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010449 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010450 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010451 (ctxt->input->cur[2] == 'D') &&
10452 (ctxt->input->cur[3] == 'O') &&
10453 (ctxt->input->cur[4] == 'C') &&
10454 (ctxt->input->cur[5] == 'T') &&
10455 (ctxt->input->cur[6] == 'Y') &&
10456 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010457 (ctxt->input->cur[8] == 'E')) {
10458 if ((!terminate) &&
10459 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10460 goto done;
10461#ifdef DEBUG_PUSH
10462 xmlGenericError(xmlGenericErrorContext,
10463 "PP: Parsing internal subset\n");
10464#endif
10465 ctxt->inSubset = 1;
10466 xmlParseDocTypeDecl(ctxt);
10467 if (RAW == '[') {
10468 ctxt->instate = XML_PARSER_DTD;
10469#ifdef DEBUG_PUSH
10470 xmlGenericError(xmlGenericErrorContext,
10471 "PP: entering DTD\n");
10472#endif
10473 } else {
10474 /*
10475 * Create and update the external subset.
10476 */
10477 ctxt->inSubset = 2;
10478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10479 (ctxt->sax->externalSubset != NULL))
10480 ctxt->sax->externalSubset(ctxt->userData,
10481 ctxt->intSubName, ctxt->extSubSystem,
10482 ctxt->extSubURI);
10483 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010484 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010485 ctxt->instate = XML_PARSER_PROLOG;
10486#ifdef DEBUG_PUSH
10487 xmlGenericError(xmlGenericErrorContext,
10488 "PP: entering PROLOG\n");
10489#endif
10490 }
10491 } else if ((cur == '<') && (next == '!') &&
10492 (avail < 9)) {
10493 goto done;
10494 } else {
10495 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010496 ctxt->progressive = 1;
10497 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010498#ifdef DEBUG_PUSH
10499 xmlGenericError(xmlGenericErrorContext,
10500 "PP: entering START_TAG\n");
10501#endif
10502 }
10503 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010504 case XML_PARSER_PROLOG:
10505 SKIP_BLANKS;
10506 if (ctxt->input->buf == NULL)
10507 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10508 else
10509 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10510 if (avail < 2)
10511 goto done;
10512 cur = ctxt->input->cur[0];
10513 next = ctxt->input->cur[1];
10514 if ((cur == '<') && (next == '?')) {
10515 if ((!terminate) &&
10516 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10517 goto done;
10518#ifdef DEBUG_PUSH
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: Parsing PI\n");
10521#endif
10522 xmlParsePI(ctxt);
10523 } else if ((cur == '<') && (next == '!') &&
10524 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10525 if ((!terminate) &&
10526 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10527 goto done;
10528#ifdef DEBUG_PUSH
10529 xmlGenericError(xmlGenericErrorContext,
10530 "PP: Parsing Comment\n");
10531#endif
10532 xmlParseComment(ctxt);
10533 ctxt->instate = XML_PARSER_PROLOG;
10534 } else if ((cur == '<') && (next == '!') &&
10535 (avail < 4)) {
10536 goto done;
10537 } else {
10538 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010539 if (ctxt->progressive == 0)
10540 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010541 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010542#ifdef DEBUG_PUSH
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: entering START_TAG\n");
10545#endif
10546 }
10547 break;
10548 case XML_PARSER_EPILOG:
10549 SKIP_BLANKS;
10550 if (ctxt->input->buf == NULL)
10551 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10552 else
10553 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10554 if (avail < 2)
10555 goto done;
10556 cur = ctxt->input->cur[0];
10557 next = ctxt->input->cur[1];
10558 if ((cur == '<') && (next == '?')) {
10559 if ((!terminate) &&
10560 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10561 goto done;
10562#ifdef DEBUG_PUSH
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: Parsing PI\n");
10565#endif
10566 xmlParsePI(ctxt);
10567 ctxt->instate = XML_PARSER_EPILOG;
10568 } else if ((cur == '<') && (next == '!') &&
10569 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10570 if ((!terminate) &&
10571 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10572 goto done;
10573#ifdef DEBUG_PUSH
10574 xmlGenericError(xmlGenericErrorContext,
10575 "PP: Parsing Comment\n");
10576#endif
10577 xmlParseComment(ctxt);
10578 ctxt->instate = XML_PARSER_EPILOG;
10579 } else if ((cur == '<') && (next == '!') &&
10580 (avail < 4)) {
10581 goto done;
10582 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010583 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010584 ctxt->instate = XML_PARSER_EOF;
10585#ifdef DEBUG_PUSH
10586 xmlGenericError(xmlGenericErrorContext,
10587 "PP: entering EOF\n");
10588#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010589 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010590 ctxt->sax->endDocument(ctxt->userData);
10591 goto done;
10592 }
10593 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010594 case XML_PARSER_DTD: {
10595 /*
10596 * Sorry but progressive parsing of the internal subset
10597 * is not expected to be supported. We first check that
10598 * the full content of the internal subset is available and
10599 * the parsing is launched only at that point.
10600 * Internal subset ends up with "']' S? '>'" in an unescaped
10601 * section and not in a ']]>' sequence which are conditional
10602 * sections (whoever argued to keep that crap in XML deserve
10603 * a place in hell !).
10604 */
10605 int base, i;
10606 xmlChar *buf;
10607 xmlChar quote = 0;
10608
10609 base = ctxt->input->cur - ctxt->input->base;
10610 if (base < 0) return(0);
10611 if (ctxt->checkIndex > base)
10612 base = ctxt->checkIndex;
10613 buf = ctxt->input->buf->buffer->content;
10614 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10615 base++) {
10616 if (quote != 0) {
10617 if (buf[base] == quote)
10618 quote = 0;
10619 continue;
10620 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010621 if ((quote == 0) && (buf[base] == '<')) {
10622 int found = 0;
10623 /* special handling of comments */
10624 if (((unsigned int) base + 4 <
10625 ctxt->input->buf->buffer->use) &&
10626 (buf[base + 1] == '!') &&
10627 (buf[base + 2] == '-') &&
10628 (buf[base + 3] == '-')) {
10629 for (;(unsigned int) base + 3 <
10630 ctxt->input->buf->buffer->use; base++) {
10631 if ((buf[base] == '-') &&
10632 (buf[base + 1] == '-') &&
10633 (buf[base + 2] == '>')) {
10634 found = 1;
10635 base += 2;
10636 break;
10637 }
10638 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010639 if (!found) {
10640#if 0
10641 fprintf(stderr, "unfinished comment\n");
10642#endif
10643 break; /* for */
10644 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010645 continue;
10646 }
10647 }
Owen Taylor3473f882001-02-23 17:55:21 +000010648 if (buf[base] == '"') {
10649 quote = '"';
10650 continue;
10651 }
10652 if (buf[base] == '\'') {
10653 quote = '\'';
10654 continue;
10655 }
10656 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010657#if 0
10658 fprintf(stderr, "%c%c%c%c: ", buf[base],
10659 buf[base + 1], buf[base + 2], buf[base + 3]);
10660#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010661 if ((unsigned int) base +1 >=
10662 ctxt->input->buf->buffer->use)
10663 break;
10664 if (buf[base + 1] == ']') {
10665 /* conditional crap, skip both ']' ! */
10666 base++;
10667 continue;
10668 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010669 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010670 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10671 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010672 if (buf[base + i] == '>') {
10673#if 0
10674 fprintf(stderr, "found\n");
10675#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010676 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010677 }
10678 if (!IS_BLANK_CH(buf[base + i])) {
10679#if 0
10680 fprintf(stderr, "not found\n");
10681#endif
10682 goto not_end_of_int_subset;
10683 }
Owen Taylor3473f882001-02-23 17:55:21 +000010684 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010685#if 0
10686 fprintf(stderr, "end of stream\n");
10687#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010688 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010689
Owen Taylor3473f882001-02-23 17:55:21 +000010690 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010691not_end_of_int_subset:
10692 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010693 }
10694 /*
10695 * We didn't found the end of the Internal subset
10696 */
Owen Taylor3473f882001-02-23 17:55:21 +000010697#ifdef DEBUG_PUSH
10698 if (next == 0)
10699 xmlGenericError(xmlGenericErrorContext,
10700 "PP: lookup of int subset end filed\n");
10701#endif
10702 goto done;
10703
10704found_end_int_subset:
10705 xmlParseInternalSubset(ctxt);
10706 ctxt->inSubset = 2;
10707 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10708 (ctxt->sax->externalSubset != NULL))
10709 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10710 ctxt->extSubSystem, ctxt->extSubURI);
10711 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010712 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010713 ctxt->instate = XML_PARSER_PROLOG;
10714 ctxt->checkIndex = 0;
10715#ifdef DEBUG_PUSH
10716 xmlGenericError(xmlGenericErrorContext,
10717 "PP: entering PROLOG\n");
10718#endif
10719 break;
10720 }
10721 case XML_PARSER_COMMENT:
10722 xmlGenericError(xmlGenericErrorContext,
10723 "PP: internal error, state == COMMENT\n");
10724 ctxt->instate = XML_PARSER_CONTENT;
10725#ifdef DEBUG_PUSH
10726 xmlGenericError(xmlGenericErrorContext,
10727 "PP: entering CONTENT\n");
10728#endif
10729 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010730 case XML_PARSER_IGNORE:
10731 xmlGenericError(xmlGenericErrorContext,
10732 "PP: internal error, state == IGNORE");
10733 ctxt->instate = XML_PARSER_DTD;
10734#ifdef DEBUG_PUSH
10735 xmlGenericError(xmlGenericErrorContext,
10736 "PP: entering DTD\n");
10737#endif
10738 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010739 case XML_PARSER_PI:
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: internal error, state == PI\n");
10742 ctxt->instate = XML_PARSER_CONTENT;
10743#ifdef DEBUG_PUSH
10744 xmlGenericError(xmlGenericErrorContext,
10745 "PP: entering CONTENT\n");
10746#endif
10747 break;
10748 case XML_PARSER_ENTITY_DECL:
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: internal error, state == ENTITY_DECL\n");
10751 ctxt->instate = XML_PARSER_DTD;
10752#ifdef DEBUG_PUSH
10753 xmlGenericError(xmlGenericErrorContext,
10754 "PP: entering DTD\n");
10755#endif
10756 break;
10757 case XML_PARSER_ENTITY_VALUE:
10758 xmlGenericError(xmlGenericErrorContext,
10759 "PP: internal error, state == ENTITY_VALUE\n");
10760 ctxt->instate = XML_PARSER_CONTENT;
10761#ifdef DEBUG_PUSH
10762 xmlGenericError(xmlGenericErrorContext,
10763 "PP: entering DTD\n");
10764#endif
10765 break;
10766 case XML_PARSER_ATTRIBUTE_VALUE:
10767 xmlGenericError(xmlGenericErrorContext,
10768 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10769 ctxt->instate = XML_PARSER_START_TAG;
10770#ifdef DEBUG_PUSH
10771 xmlGenericError(xmlGenericErrorContext,
10772 "PP: entering START_TAG\n");
10773#endif
10774 break;
10775 case XML_PARSER_SYSTEM_LITERAL:
10776 xmlGenericError(xmlGenericErrorContext,
10777 "PP: internal error, state == SYSTEM_LITERAL\n");
10778 ctxt->instate = XML_PARSER_START_TAG;
10779#ifdef DEBUG_PUSH
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: entering START_TAG\n");
10782#endif
10783 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010784 case XML_PARSER_PUBLIC_LITERAL:
10785 xmlGenericError(xmlGenericErrorContext,
10786 "PP: internal error, state == PUBLIC_LITERAL\n");
10787 ctxt->instate = XML_PARSER_START_TAG;
10788#ifdef DEBUG_PUSH
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: entering START_TAG\n");
10791#endif
10792 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010793 }
10794 }
10795done:
10796#ifdef DEBUG_PUSH
10797 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10798#endif
10799 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010800encoding_error:
10801 {
10802 char buffer[150];
10803
10804 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10805 ctxt->input->cur[0], ctxt->input->cur[1],
10806 ctxt->input->cur[2], ctxt->input->cur[3]);
10807 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10808 "Input is not proper UTF-8, indicate encoding !\n%s",
10809 BAD_CAST buffer, NULL);
10810 }
10811 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010812}
10813
10814/**
Owen Taylor3473f882001-02-23 17:55:21 +000010815 * xmlParseChunk:
10816 * @ctxt: an XML parser context
10817 * @chunk: an char array
10818 * @size: the size in byte of the chunk
10819 * @terminate: last chunk indicator
10820 *
10821 * Parse a Chunk of memory
10822 *
10823 * Returns zero if no error, the xmlParserErrors otherwise.
10824 */
10825int
10826xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10827 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010828 int end_in_lf = 0;
10829
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010830 if (ctxt == NULL)
10831 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010832 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010833 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010834 if (ctxt->instate == XML_PARSER_START)
10835 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010836 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10837 (chunk[size - 1] == '\r')) {
10838 end_in_lf = 1;
10839 size--;
10840 }
Owen Taylor3473f882001-02-23 17:55:21 +000010841 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10842 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10843 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10844 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010845 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010846
William M. Bracka3215c72004-07-31 16:24:01 +000010847 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10848 if (res < 0) {
10849 ctxt->errNo = XML_PARSER_EOF;
10850 ctxt->disableSAX = 1;
10851 return (XML_PARSER_EOF);
10852 }
Owen Taylor3473f882001-02-23 17:55:21 +000010853 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10854 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010855 ctxt->input->end =
10856 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010857#ifdef DEBUG_PUSH
10858 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10859#endif
10860
Owen Taylor3473f882001-02-23 17:55:21 +000010861 } else if (ctxt->instate != XML_PARSER_EOF) {
10862 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10863 xmlParserInputBufferPtr in = ctxt->input->buf;
10864 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10865 (in->raw != NULL)) {
10866 int nbchars;
10867
10868 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10869 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010870 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010871 xmlGenericError(xmlGenericErrorContext,
10872 "xmlParseChunk: encoder error\n");
10873 return(XML_ERR_INVALID_ENCODING);
10874 }
10875 }
10876 }
10877 }
10878 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010879 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10880 (ctxt->input->buf != NULL)) {
10881 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10882 }
Daniel Veillard14412512005-01-21 23:53:26 +000010883 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010884 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010885 if (terminate) {
10886 /*
10887 * Check for termination
10888 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010889 int avail = 0;
10890
10891 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010892 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010893 avail = ctxt->input->length -
10894 (ctxt->input->cur - ctxt->input->base);
10895 else
10896 avail = ctxt->input->buf->buffer->use -
10897 (ctxt->input->cur - ctxt->input->base);
10898 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010899
Owen Taylor3473f882001-02-23 17:55:21 +000010900 if ((ctxt->instate != XML_PARSER_EOF) &&
10901 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010902 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010903 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010904 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010905 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010906 }
Owen Taylor3473f882001-02-23 17:55:21 +000010907 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010908 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010909 ctxt->sax->endDocument(ctxt->userData);
10910 }
10911 ctxt->instate = XML_PARSER_EOF;
10912 }
10913 return((xmlParserErrors) ctxt->errNo);
10914}
10915
10916/************************************************************************
10917 * *
10918 * I/O front end functions to the parser *
10919 * *
10920 ************************************************************************/
10921
10922/**
Owen Taylor3473f882001-02-23 17:55:21 +000010923 * xmlCreatePushParserCtxt:
10924 * @sax: a SAX handler
10925 * @user_data: The user data returned on SAX callbacks
10926 * @chunk: a pointer to an array of chars
10927 * @size: number of chars in the array
10928 * @filename: an optional file name or URI
10929 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010930 * Create a parser context for using the XML parser in push mode.
10931 * If @buffer and @size are non-NULL, the data is used to detect
10932 * the encoding. The remaining characters will be parsed so they
10933 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010934 * To allow content encoding detection, @size should be >= 4
10935 * The value of @filename is used for fetching external entities
10936 * and error/warning reports.
10937 *
10938 * Returns the new parser context or NULL
10939 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010940
Owen Taylor3473f882001-02-23 17:55:21 +000010941xmlParserCtxtPtr
10942xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10943 const char *chunk, int size, const char *filename) {
10944 xmlParserCtxtPtr ctxt;
10945 xmlParserInputPtr inputStream;
10946 xmlParserInputBufferPtr buf;
10947 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10948
10949 /*
10950 * plug some encoding conversion routines
10951 */
10952 if ((chunk != NULL) && (size >= 4))
10953 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10954
10955 buf = xmlAllocParserInputBuffer(enc);
10956 if (buf == NULL) return(NULL);
10957
10958 ctxt = xmlNewParserCtxt();
10959 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010960 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010961 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010962 return(NULL);
10963 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010964 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010965 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10966 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010967 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010968 xmlFreeParserInputBuffer(buf);
10969 xmlFreeParserCtxt(ctxt);
10970 return(NULL);
10971 }
Owen Taylor3473f882001-02-23 17:55:21 +000010972 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010973#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010974 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010975#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010976 xmlFree(ctxt->sax);
10977 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10978 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010979 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010980 xmlFreeParserInputBuffer(buf);
10981 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010982 return(NULL);
10983 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010984 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10985 if (sax->initialized == XML_SAX2_MAGIC)
10986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10987 else
10988 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010989 if (user_data != NULL)
10990 ctxt->userData = user_data;
10991 }
10992 if (filename == NULL) {
10993 ctxt->directory = NULL;
10994 } else {
10995 ctxt->directory = xmlParserGetDirectory(filename);
10996 }
10997
10998 inputStream = xmlNewInputStream(ctxt);
10999 if (inputStream == NULL) {
11000 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011001 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011002 return(NULL);
11003 }
11004
11005 if (filename == NULL)
11006 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011007 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011008 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011009 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011010 if (inputStream->filename == NULL) {
11011 xmlFreeParserCtxt(ctxt);
11012 xmlFreeParserInputBuffer(buf);
11013 return(NULL);
11014 }
11015 }
Owen Taylor3473f882001-02-23 17:55:21 +000011016 inputStream->buf = buf;
11017 inputStream->base = inputStream->buf->buffer->content;
11018 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011019 inputStream->end =
11020 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011021
11022 inputPush(ctxt, inputStream);
11023
William M. Brack3a1cd212005-02-11 14:35:54 +000011024 /*
11025 * If the caller didn't provide an initial 'chunk' for determining
11026 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11027 * that it can be automatically determined later
11028 */
11029 if ((size == 0) || (chunk == NULL)) {
11030 ctxt->charset = XML_CHAR_ENCODING_NONE;
11031 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011032 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11033 int cur = ctxt->input->cur - ctxt->input->base;
11034
Owen Taylor3473f882001-02-23 17:55:21 +000011035 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011036
11037 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11038 ctxt->input->cur = ctxt->input->base + cur;
11039 ctxt->input->end =
11040 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011041#ifdef DEBUG_PUSH
11042 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11043#endif
11044 }
11045
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011046 if (enc != XML_CHAR_ENCODING_NONE) {
11047 xmlSwitchEncoding(ctxt, enc);
11048 }
11049
Owen Taylor3473f882001-02-23 17:55:21 +000011050 return(ctxt);
11051}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011052#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011053
11054/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011055 * xmlStopParser:
11056 * @ctxt: an XML parser context
11057 *
11058 * Blocks further parser processing
11059 */
11060void
11061xmlStopParser(xmlParserCtxtPtr ctxt) {
11062 if (ctxt == NULL)
11063 return;
11064 ctxt->instate = XML_PARSER_EOF;
11065 ctxt->disableSAX = 1;
11066 if (ctxt->input != NULL) {
11067 ctxt->input->cur = BAD_CAST"";
11068 ctxt->input->base = ctxt->input->cur;
11069 }
11070}
11071
11072/**
Owen Taylor3473f882001-02-23 17:55:21 +000011073 * xmlCreateIOParserCtxt:
11074 * @sax: a SAX handler
11075 * @user_data: The user data returned on SAX callbacks
11076 * @ioread: an I/O read function
11077 * @ioclose: an I/O close function
11078 * @ioctx: an I/O handler
11079 * @enc: the charset encoding if known
11080 *
11081 * Create a parser context for using the XML parser with an existing
11082 * I/O stream
11083 *
11084 * Returns the new parser context or NULL
11085 */
11086xmlParserCtxtPtr
11087xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11088 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11089 void *ioctx, xmlCharEncoding enc) {
11090 xmlParserCtxtPtr ctxt;
11091 xmlParserInputPtr inputStream;
11092 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011093
11094 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011095
11096 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11097 if (buf == NULL) return(NULL);
11098
11099 ctxt = xmlNewParserCtxt();
11100 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011101 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011102 return(NULL);
11103 }
11104 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011105#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011106 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011107#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011108 xmlFree(ctxt->sax);
11109 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11110 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011111 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011112 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011113 return(NULL);
11114 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011115 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11116 if (sax->initialized == XML_SAX2_MAGIC)
11117 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11118 else
11119 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011120 if (user_data != NULL)
11121 ctxt->userData = user_data;
11122 }
11123
11124 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11125 if (inputStream == NULL) {
11126 xmlFreeParserCtxt(ctxt);
11127 return(NULL);
11128 }
11129 inputPush(ctxt, inputStream);
11130
11131 return(ctxt);
11132}
11133
Daniel Veillard4432df22003-09-28 18:58:27 +000011134#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011135/************************************************************************
11136 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011137 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011138 * *
11139 ************************************************************************/
11140
11141/**
11142 * xmlIOParseDTD:
11143 * @sax: the SAX handler block or NULL
11144 * @input: an Input Buffer
11145 * @enc: the charset encoding if known
11146 *
11147 * Load and parse a DTD
11148 *
11149 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011150 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011151 */
11152
11153xmlDtdPtr
11154xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11155 xmlCharEncoding enc) {
11156 xmlDtdPtr ret = NULL;
11157 xmlParserCtxtPtr ctxt;
11158 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011159 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011160
11161 if (input == NULL)
11162 return(NULL);
11163
11164 ctxt = xmlNewParserCtxt();
11165 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011166 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011167 return(NULL);
11168 }
11169
11170 /*
11171 * Set-up the SAX context
11172 */
11173 if (sax != NULL) {
11174 if (ctxt->sax != NULL)
11175 xmlFree(ctxt->sax);
11176 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011177 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011178 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011179 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011180
11181 /*
11182 * generate a parser input from the I/O handler
11183 */
11184
Daniel Veillard43caefb2003-12-07 19:32:22 +000011185 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011186 if (pinput == NULL) {
11187 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011188 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011189 xmlFreeParserCtxt(ctxt);
11190 return(NULL);
11191 }
11192
11193 /*
11194 * plug some encoding conversion routines here.
11195 */
11196 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011197 if (enc != XML_CHAR_ENCODING_NONE) {
11198 xmlSwitchEncoding(ctxt, enc);
11199 }
Owen Taylor3473f882001-02-23 17:55:21 +000011200
11201 pinput->filename = NULL;
11202 pinput->line = 1;
11203 pinput->col = 1;
11204 pinput->base = ctxt->input->cur;
11205 pinput->cur = ctxt->input->cur;
11206 pinput->free = NULL;
11207
11208 /*
11209 * let's parse that entity knowing it's an external subset.
11210 */
11211 ctxt->inSubset = 2;
11212 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11213 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11214 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011215
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011216 if ((enc == XML_CHAR_ENCODING_NONE) &&
11217 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011218 /*
11219 * Get the 4 first bytes and decode the charset
11220 * if enc != XML_CHAR_ENCODING_NONE
11221 * plug some encoding conversion routines.
11222 */
11223 start[0] = RAW;
11224 start[1] = NXT(1);
11225 start[2] = NXT(2);
11226 start[3] = NXT(3);
11227 enc = xmlDetectCharEncoding(start, 4);
11228 if (enc != XML_CHAR_ENCODING_NONE) {
11229 xmlSwitchEncoding(ctxt, enc);
11230 }
11231 }
11232
Owen Taylor3473f882001-02-23 17:55:21 +000011233 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11234
11235 if (ctxt->myDoc != NULL) {
11236 if (ctxt->wellFormed) {
11237 ret = ctxt->myDoc->extSubset;
11238 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011239 if (ret != NULL) {
11240 xmlNodePtr tmp;
11241
11242 ret->doc = NULL;
11243 tmp = ret->children;
11244 while (tmp != NULL) {
11245 tmp->doc = NULL;
11246 tmp = tmp->next;
11247 }
11248 }
Owen Taylor3473f882001-02-23 17:55:21 +000011249 } else {
11250 ret = NULL;
11251 }
11252 xmlFreeDoc(ctxt->myDoc);
11253 ctxt->myDoc = NULL;
11254 }
11255 if (sax != NULL) ctxt->sax = NULL;
11256 xmlFreeParserCtxt(ctxt);
11257
11258 return(ret);
11259}
11260
11261/**
11262 * xmlSAXParseDTD:
11263 * @sax: the SAX handler block
11264 * @ExternalID: a NAME* containing the External ID of the DTD
11265 * @SystemID: a NAME* containing the URL to the DTD
11266 *
11267 * Load and parse an external subset.
11268 *
11269 * Returns the resulting xmlDtdPtr or NULL in case of error.
11270 */
11271
11272xmlDtdPtr
11273xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11274 const xmlChar *SystemID) {
11275 xmlDtdPtr ret = NULL;
11276 xmlParserCtxtPtr ctxt;
11277 xmlParserInputPtr input = NULL;
11278 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011279 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011280
11281 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11282
11283 ctxt = xmlNewParserCtxt();
11284 if (ctxt == NULL) {
11285 return(NULL);
11286 }
11287
11288 /*
11289 * Set-up the SAX context
11290 */
11291 if (sax != NULL) {
11292 if (ctxt->sax != NULL)
11293 xmlFree(ctxt->sax);
11294 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011295 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011296 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011297
11298 /*
11299 * Canonicalise the system ID
11300 */
11301 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011302 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011303 xmlFreeParserCtxt(ctxt);
11304 return(NULL);
11305 }
Owen Taylor3473f882001-02-23 17:55:21 +000011306
11307 /*
11308 * Ask the Entity resolver to load the damn thing
11309 */
11310
11311 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011312 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11313 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011314 if (input == NULL) {
11315 if (sax != NULL) ctxt->sax = NULL;
11316 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011317 if (systemIdCanonic != NULL)
11318 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011319 return(NULL);
11320 }
11321
11322 /*
11323 * plug some encoding conversion routines here.
11324 */
11325 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011326 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11327 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11328 xmlSwitchEncoding(ctxt, enc);
11329 }
Owen Taylor3473f882001-02-23 17:55:21 +000011330
11331 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011332 input->filename = (char *) systemIdCanonic;
11333 else
11334 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011335 input->line = 1;
11336 input->col = 1;
11337 input->base = ctxt->input->cur;
11338 input->cur = ctxt->input->cur;
11339 input->free = NULL;
11340
11341 /*
11342 * let's parse that entity knowing it's an external subset.
11343 */
11344 ctxt->inSubset = 2;
11345 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11346 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11347 ExternalID, SystemID);
11348 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11349
11350 if (ctxt->myDoc != NULL) {
11351 if (ctxt->wellFormed) {
11352 ret = ctxt->myDoc->extSubset;
11353 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011354 if (ret != NULL) {
11355 xmlNodePtr tmp;
11356
11357 ret->doc = NULL;
11358 tmp = ret->children;
11359 while (tmp != NULL) {
11360 tmp->doc = NULL;
11361 tmp = tmp->next;
11362 }
11363 }
Owen Taylor3473f882001-02-23 17:55:21 +000011364 } else {
11365 ret = NULL;
11366 }
11367 xmlFreeDoc(ctxt->myDoc);
11368 ctxt->myDoc = NULL;
11369 }
11370 if (sax != NULL) ctxt->sax = NULL;
11371 xmlFreeParserCtxt(ctxt);
11372
11373 return(ret);
11374}
11375
Daniel Veillard4432df22003-09-28 18:58:27 +000011376
Owen Taylor3473f882001-02-23 17:55:21 +000011377/**
11378 * xmlParseDTD:
11379 * @ExternalID: a NAME* containing the External ID of the DTD
11380 * @SystemID: a NAME* containing the URL to the DTD
11381 *
11382 * Load and parse an external subset.
11383 *
11384 * Returns the resulting xmlDtdPtr or NULL in case of error.
11385 */
11386
11387xmlDtdPtr
11388xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11389 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11390}
Daniel Veillard4432df22003-09-28 18:58:27 +000011391#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011392
11393/************************************************************************
11394 * *
11395 * Front ends when parsing an Entity *
11396 * *
11397 ************************************************************************/
11398
11399/**
Owen Taylor3473f882001-02-23 17:55:21 +000011400 * xmlParseCtxtExternalEntity:
11401 * @ctx: the existing parsing context
11402 * @URL: the URL for the entity to load
11403 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011404 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011405 *
11406 * Parse an external general entity within an existing parsing context
11407 * An external general parsed entity is well-formed if it matches the
11408 * production labeled extParsedEnt.
11409 *
11410 * [78] extParsedEnt ::= TextDecl? content
11411 *
11412 * Returns 0 if the entity is well formed, -1 in case of args problem and
11413 * the parser error code otherwise
11414 */
11415
11416int
11417xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011418 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011419 xmlParserCtxtPtr ctxt;
11420 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011421 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011422 xmlSAXHandlerPtr oldsax = NULL;
11423 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011424 xmlChar start[4];
11425 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011426 xmlParserInputPtr inputStream;
11427 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011428
Daniel Veillardce682bc2004-11-05 17:22:25 +000011429 if (ctx == NULL) return(-1);
11430
Owen Taylor3473f882001-02-23 17:55:21 +000011431 if (ctx->depth > 40) {
11432 return(XML_ERR_ENTITY_LOOP);
11433 }
11434
Daniel Veillardcda96922001-08-21 10:56:31 +000011435 if (lst != NULL)
11436 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011437 if ((URL == NULL) && (ID == NULL))
11438 return(-1);
11439 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11440 return(-1);
11441
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011442 ctxt = xmlNewParserCtxt();
11443 if (ctxt == NULL) {
11444 return(-1);
11445 }
11446
Owen Taylor3473f882001-02-23 17:55:21 +000011447 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011448 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011449
11450 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11451 if (inputStream == NULL) {
11452 xmlFreeParserCtxt(ctxt);
11453 return(-1);
11454 }
11455
11456 inputPush(ctxt, inputStream);
11457
11458 if ((ctxt->directory == NULL) && (directory == NULL))
11459 directory = xmlParserGetDirectory((char *)URL);
11460 if ((ctxt->directory == NULL) && (directory != NULL))
11461 ctxt->directory = directory;
11462
Owen Taylor3473f882001-02-23 17:55:21 +000011463 oldsax = ctxt->sax;
11464 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011465 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011466 newDoc = xmlNewDoc(BAD_CAST "1.0");
11467 if (newDoc == NULL) {
11468 xmlFreeParserCtxt(ctxt);
11469 return(-1);
11470 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011471 if (ctx->myDoc->dict) {
11472 newDoc->dict = ctx->myDoc->dict;
11473 xmlDictReference(newDoc->dict);
11474 }
Owen Taylor3473f882001-02-23 17:55:21 +000011475 if (ctx->myDoc != NULL) {
11476 newDoc->intSubset = ctx->myDoc->intSubset;
11477 newDoc->extSubset = ctx->myDoc->extSubset;
11478 }
11479 if (ctx->myDoc->URL != NULL) {
11480 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11481 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011482 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11483 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011484 ctxt->sax = oldsax;
11485 xmlFreeParserCtxt(ctxt);
11486 newDoc->intSubset = NULL;
11487 newDoc->extSubset = NULL;
11488 xmlFreeDoc(newDoc);
11489 return(-1);
11490 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011491 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011492 nodePush(ctxt, newDoc->children);
11493 if (ctx->myDoc == NULL) {
11494 ctxt->myDoc = newDoc;
11495 } else {
11496 ctxt->myDoc = ctx->myDoc;
11497 newDoc->children->doc = ctx->myDoc;
11498 }
11499
Daniel Veillard87a764e2001-06-20 17:41:10 +000011500 /*
11501 * Get the 4 first bytes and decode the charset
11502 * if enc != XML_CHAR_ENCODING_NONE
11503 * plug some encoding conversion routines.
11504 */
11505 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011506 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11507 start[0] = RAW;
11508 start[1] = NXT(1);
11509 start[2] = NXT(2);
11510 start[3] = NXT(3);
11511 enc = xmlDetectCharEncoding(start, 4);
11512 if (enc != XML_CHAR_ENCODING_NONE) {
11513 xmlSwitchEncoding(ctxt, enc);
11514 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011515 }
11516
Owen Taylor3473f882001-02-23 17:55:21 +000011517 /*
11518 * Parse a possible text declaration first
11519 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011520 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011521 xmlParseTextDecl(ctxt);
11522 }
11523
11524 /*
11525 * Doing validity checking on chunk doesn't make sense
11526 */
11527 ctxt->instate = XML_PARSER_CONTENT;
11528 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011529 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011530 ctxt->loadsubset = ctx->loadsubset;
11531 ctxt->depth = ctx->depth + 1;
11532 ctxt->replaceEntities = ctx->replaceEntities;
11533 if (ctxt->validate) {
11534 ctxt->vctxt.error = ctx->vctxt.error;
11535 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011536 } else {
11537 ctxt->vctxt.error = NULL;
11538 ctxt->vctxt.warning = NULL;
11539 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011540 ctxt->vctxt.nodeTab = NULL;
11541 ctxt->vctxt.nodeNr = 0;
11542 ctxt->vctxt.nodeMax = 0;
11543 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011544 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11545 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011546 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11547 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11548 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011549 ctxt->dictNames = ctx->dictNames;
11550 ctxt->attsDefault = ctx->attsDefault;
11551 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011552 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011553
11554 xmlParseContent(ctxt);
11555
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011556 ctx->validate = ctxt->validate;
11557 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011558 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011559 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011560 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011561 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011562 }
11563 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011564 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011565 }
11566
11567 if (!ctxt->wellFormed) {
11568 if (ctxt->errNo == 0)
11569 ret = 1;
11570 else
11571 ret = ctxt->errNo;
11572 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011573 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011574 xmlNodePtr cur;
11575
11576 /*
11577 * Return the newly created nodeset after unlinking it from
11578 * they pseudo parent.
11579 */
11580 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011581 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011582 while (cur != NULL) {
11583 cur->parent = NULL;
11584 cur = cur->next;
11585 }
11586 newDoc->children->children = NULL;
11587 }
11588 ret = 0;
11589 }
11590 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011591 ctxt->dict = NULL;
11592 ctxt->attsDefault = NULL;
11593 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011594 xmlFreeParserCtxt(ctxt);
11595 newDoc->intSubset = NULL;
11596 newDoc->extSubset = NULL;
11597 xmlFreeDoc(newDoc);
11598
11599 return(ret);
11600}
11601
11602/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011603 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011604 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011605 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011606 * @sax: the SAX handler bloc (possibly NULL)
11607 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11608 * @depth: Used for loop detection, use 0
11609 * @URL: the URL for the entity to load
11610 * @ID: the System ID for the entity to load
11611 * @list: the return value for the set of parsed nodes
11612 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011613 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011614 *
11615 * Returns 0 if the entity is well formed, -1 in case of args problem and
11616 * the parser error code otherwise
11617 */
11618
Daniel Veillard7d515752003-09-26 19:12:37 +000011619static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011620xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11621 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011622 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011623 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011624 xmlParserCtxtPtr ctxt;
11625 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011626 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011627 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011628 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011629 xmlChar start[4];
11630 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011631
11632 if (depth > 40) {
11633 return(XML_ERR_ENTITY_LOOP);
11634 }
11635
11636
11637
11638 if (list != NULL)
11639 *list = NULL;
11640 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011641 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011642 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011643 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011644
11645
11646 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011647 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011648 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011649 if (oldctxt != NULL) {
11650 ctxt->_private = oldctxt->_private;
11651 ctxt->loadsubset = oldctxt->loadsubset;
11652 ctxt->validate = oldctxt->validate;
11653 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011654 ctxt->record_info = oldctxt->record_info;
11655 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11656 ctxt->node_seq.length = oldctxt->node_seq.length;
11657 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011658 } else {
11659 /*
11660 * Doing validity checking on chunk without context
11661 * doesn't make sense
11662 */
11663 ctxt->_private = NULL;
11664 ctxt->validate = 0;
11665 ctxt->external = 2;
11666 ctxt->loadsubset = 0;
11667 }
Owen Taylor3473f882001-02-23 17:55:21 +000011668 if (sax != NULL) {
11669 oldsax = ctxt->sax;
11670 ctxt->sax = sax;
11671 if (user_data != NULL)
11672 ctxt->userData = user_data;
11673 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011674 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011675 newDoc = xmlNewDoc(BAD_CAST "1.0");
11676 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011677 ctxt->node_seq.maximum = 0;
11678 ctxt->node_seq.length = 0;
11679 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011680 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011681 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011682 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011683 newDoc->intSubset = doc->intSubset;
11684 newDoc->extSubset = doc->extSubset;
11685 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011686 xmlDictReference(newDoc->dict);
11687
Owen Taylor3473f882001-02-23 17:55:21 +000011688 if (doc->URL != NULL) {
11689 newDoc->URL = xmlStrdup(doc->URL);
11690 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011691 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11692 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011693 if (sax != NULL)
11694 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011695 ctxt->node_seq.maximum = 0;
11696 ctxt->node_seq.length = 0;
11697 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011698 xmlFreeParserCtxt(ctxt);
11699 newDoc->intSubset = NULL;
11700 newDoc->extSubset = NULL;
11701 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011702 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011703 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011704 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011705 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011706 ctxt->myDoc = doc;
11707 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011708
Daniel Veillard87a764e2001-06-20 17:41:10 +000011709 /*
11710 * Get the 4 first bytes and decode the charset
11711 * if enc != XML_CHAR_ENCODING_NONE
11712 * plug some encoding conversion routines.
11713 */
11714 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011715 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11716 start[0] = RAW;
11717 start[1] = NXT(1);
11718 start[2] = NXT(2);
11719 start[3] = NXT(3);
11720 enc = xmlDetectCharEncoding(start, 4);
11721 if (enc != XML_CHAR_ENCODING_NONE) {
11722 xmlSwitchEncoding(ctxt, enc);
11723 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011724 }
11725
Owen Taylor3473f882001-02-23 17:55:21 +000011726 /*
11727 * Parse a possible text declaration first
11728 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011729 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011730 xmlParseTextDecl(ctxt);
11731 }
11732
Owen Taylor3473f882001-02-23 17:55:21 +000011733 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011734 ctxt->depth = depth;
11735
11736 xmlParseContent(ctxt);
11737
Daniel Veillard561b7f82002-03-20 21:55:57 +000011738 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011739 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011740 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011741 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011742 }
11743 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011744 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011745 }
11746
11747 if (!ctxt->wellFormed) {
11748 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011749 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011751 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011752 } else {
11753 if (list != NULL) {
11754 xmlNodePtr cur;
11755
11756 /*
11757 * Return the newly created nodeset after unlinking it from
11758 * they pseudo parent.
11759 */
11760 cur = newDoc->children->children;
11761 *list = cur;
11762 while (cur != NULL) {
11763 cur->parent = NULL;
11764 cur = cur->next;
11765 }
11766 newDoc->children->children = NULL;
11767 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011768 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011769 }
11770 if (sax != NULL)
11771 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011772 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11773 oldctxt->node_seq.length = ctxt->node_seq.length;
11774 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011775 ctxt->node_seq.maximum = 0;
11776 ctxt->node_seq.length = 0;
11777 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011778 xmlFreeParserCtxt(ctxt);
11779 newDoc->intSubset = NULL;
11780 newDoc->extSubset = NULL;
11781 xmlFreeDoc(newDoc);
11782
11783 return(ret);
11784}
11785
Daniel Veillard81273902003-09-30 00:43:48 +000011786#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011787/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011788 * xmlParseExternalEntity:
11789 * @doc: the document the chunk pertains to
11790 * @sax: the SAX handler bloc (possibly NULL)
11791 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11792 * @depth: Used for loop detection, use 0
11793 * @URL: the URL for the entity to load
11794 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011795 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011796 *
11797 * Parse an external general entity
11798 * An external general parsed entity is well-formed if it matches the
11799 * production labeled extParsedEnt.
11800 *
11801 * [78] extParsedEnt ::= TextDecl? content
11802 *
11803 * Returns 0 if the entity is well formed, -1 in case of args problem and
11804 * the parser error code otherwise
11805 */
11806
11807int
11808xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011809 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011810 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011811 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011812}
11813
11814/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011815 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011816 * @doc: the document the chunk pertains to
11817 * @sax: the SAX handler bloc (possibly NULL)
11818 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11819 * @depth: Used for loop detection, use 0
11820 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011821 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011822 *
11823 * Parse a well-balanced chunk of an XML document
11824 * called by the parser
11825 * The allowed sequence for the Well Balanced Chunk is the one defined by
11826 * the content production in the XML grammar:
11827 *
11828 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11829 *
11830 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11831 * the parser error code otherwise
11832 */
11833
11834int
11835xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011836 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011837 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11838 depth, string, lst, 0 );
11839}
Daniel Veillard81273902003-09-30 00:43:48 +000011840#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011841
11842/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011843 * xmlParseBalancedChunkMemoryInternal:
11844 * @oldctxt: the existing parsing context
11845 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11846 * @user_data: the user data field for the parser context
11847 * @lst: the return value for the set of parsed nodes
11848 *
11849 *
11850 * Parse a well-balanced chunk of an XML document
11851 * called by the parser
11852 * The allowed sequence for the Well Balanced Chunk is the one defined by
11853 * the content production in the XML grammar:
11854 *
11855 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11856 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011857 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11858 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011859 *
11860 * In case recover is set to 1, the nodelist will not be empty even if
11861 * the parsed chunk is not well balanced.
11862 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011863static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011864xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11865 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11866 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011867 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011868 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011869 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011870 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011871 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011872 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011873 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011874
11875 if (oldctxt->depth > 40) {
11876 return(XML_ERR_ENTITY_LOOP);
11877 }
11878
11879
11880 if (lst != NULL)
11881 *lst = NULL;
11882 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011883 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011884
11885 size = xmlStrlen(string);
11886
11887 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011888 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011889 if (user_data != NULL)
11890 ctxt->userData = user_data;
11891 else
11892 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011893 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11894 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011895 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11896 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11897 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011898
11899 oldsax = ctxt->sax;
11900 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011901 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011902 ctxt->replaceEntities = oldctxt->replaceEntities;
11903 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011904
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011905 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011906 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011907 newDoc = xmlNewDoc(BAD_CAST "1.0");
11908 if (newDoc == NULL) {
11909 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011910 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011911 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011912 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011913 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011914 newDoc->dict = ctxt->dict;
11915 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011916 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011917 } else {
11918 ctxt->myDoc = oldctxt->myDoc;
11919 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011920 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011921 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011922 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11923 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011924 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011925 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011926 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011927 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011928 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011929 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011930 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011931 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011932 ctxt->myDoc->children = NULL;
11933 ctxt->myDoc->last = NULL;
11934 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011935 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011936 ctxt->instate = XML_PARSER_CONTENT;
11937 ctxt->depth = oldctxt->depth + 1;
11938
Daniel Veillard328f48c2002-11-15 15:24:34 +000011939 ctxt->validate = 0;
11940 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011941 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11942 /*
11943 * ID/IDREF registration will be done in xmlValidateElement below
11944 */
11945 ctxt->loadsubset |= XML_SKIP_IDS;
11946 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011947 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011948 ctxt->attsDefault = oldctxt->attsDefault;
11949 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011950
Daniel Veillard68e9e742002-11-16 15:35:11 +000011951 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011952 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011953 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011954 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011955 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011956 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011957 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011958 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011959 }
11960
11961 if (!ctxt->wellFormed) {
11962 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011963 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011964 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011965 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011966 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011967 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011968 }
11969
William M. Brack7b9154b2003-09-27 19:23:50 +000011970 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011971 xmlNodePtr cur;
11972
11973 /*
11974 * Return the newly created nodeset after unlinking it from
11975 * they pseudo parent.
11976 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011977 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011978 *lst = cur;
11979 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011980#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011981 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11982 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11983 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011984 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11985 oldctxt->myDoc, cur);
11986 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011987#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011988 cur->parent = NULL;
11989 cur = cur->next;
11990 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011991 ctxt->myDoc->children->children = NULL;
11992 }
11993 if (ctxt->myDoc != NULL) {
11994 xmlFreeNode(ctxt->myDoc->children);
11995 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011996 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011997 }
11998
11999 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012000 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012001 ctxt->attsDefault = NULL;
12002 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012003 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012004 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012005 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012006 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012007
12008 return(ret);
12009}
12010
Daniel Veillard29b17482004-08-16 00:39:03 +000012011/**
12012 * xmlParseInNodeContext:
12013 * @node: the context node
12014 * @data: the input string
12015 * @datalen: the input string length in bytes
12016 * @options: a combination of xmlParserOption
12017 * @lst: the return value for the set of parsed nodes
12018 *
12019 * Parse a well-balanced chunk of an XML document
12020 * within the context (DTD, namespaces, etc ...) of the given node.
12021 *
12022 * The allowed sequence for the data is a Well Balanced Chunk defined by
12023 * the content production in the XML grammar:
12024 *
12025 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12026 *
12027 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12028 * error code otherwise
12029 */
12030xmlParserErrors
12031xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12032 int options, xmlNodePtr *lst) {
12033#ifdef SAX2
12034 xmlParserCtxtPtr ctxt;
12035 xmlDocPtr doc = NULL;
12036 xmlNodePtr fake, cur;
12037 int nsnr = 0;
12038
12039 xmlParserErrors ret = XML_ERR_OK;
12040
12041 /*
12042 * check all input parameters, grab the document
12043 */
12044 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12045 return(XML_ERR_INTERNAL_ERROR);
12046 switch (node->type) {
12047 case XML_ELEMENT_NODE:
12048 case XML_ATTRIBUTE_NODE:
12049 case XML_TEXT_NODE:
12050 case XML_CDATA_SECTION_NODE:
12051 case XML_ENTITY_REF_NODE:
12052 case XML_PI_NODE:
12053 case XML_COMMENT_NODE:
12054 case XML_DOCUMENT_NODE:
12055 case XML_HTML_DOCUMENT_NODE:
12056 break;
12057 default:
12058 return(XML_ERR_INTERNAL_ERROR);
12059
12060 }
12061 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12062 (node->type != XML_DOCUMENT_NODE) &&
12063 (node->type != XML_HTML_DOCUMENT_NODE))
12064 node = node->parent;
12065 if (node == NULL)
12066 return(XML_ERR_INTERNAL_ERROR);
12067 if (node->type == XML_ELEMENT_NODE)
12068 doc = node->doc;
12069 else
12070 doc = (xmlDocPtr) node;
12071 if (doc == NULL)
12072 return(XML_ERR_INTERNAL_ERROR);
12073
12074 /*
12075 * allocate a context and set-up everything not related to the
12076 * node position in the tree
12077 */
12078 if (doc->type == XML_DOCUMENT_NODE)
12079 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12080#ifdef LIBXML_HTML_ENABLED
12081 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12082 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12083#endif
12084 else
12085 return(XML_ERR_INTERNAL_ERROR);
12086
12087 if (ctxt == NULL)
12088 return(XML_ERR_NO_MEMORY);
12089 fake = xmlNewComment(NULL);
12090 if (fake == NULL) {
12091 xmlFreeParserCtxt(ctxt);
12092 return(XML_ERR_NO_MEMORY);
12093 }
12094 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012095
12096 /*
12097 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12098 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12099 * we must wait until the last moment to free the original one.
12100 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012101 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012102 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012103 xmlDictFree(ctxt->dict);
12104 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012105 } else
12106 options |= XML_PARSE_NODICT;
12107
12108 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012109 xmlDetectSAX2(ctxt);
12110 ctxt->myDoc = doc;
12111
12112 if (node->type == XML_ELEMENT_NODE) {
12113 nodePush(ctxt, node);
12114 /*
12115 * initialize the SAX2 namespaces stack
12116 */
12117 cur = node;
12118 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12119 xmlNsPtr ns = cur->nsDef;
12120 const xmlChar *iprefix, *ihref;
12121
12122 while (ns != NULL) {
12123 if (ctxt->dict) {
12124 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12125 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12126 } else {
12127 iprefix = ns->prefix;
12128 ihref = ns->href;
12129 }
12130
12131 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12132 nsPush(ctxt, iprefix, ihref);
12133 nsnr++;
12134 }
12135 ns = ns->next;
12136 }
12137 cur = cur->parent;
12138 }
12139 ctxt->instate = XML_PARSER_CONTENT;
12140 }
12141
12142 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12143 /*
12144 * ID/IDREF registration will be done in xmlValidateElement below
12145 */
12146 ctxt->loadsubset |= XML_SKIP_IDS;
12147 }
12148
Daniel Veillard499cc922006-01-18 17:22:35 +000012149#ifdef LIBXML_HTML_ENABLED
12150 if (doc->type == XML_HTML_DOCUMENT_NODE)
12151 __htmlParseContent(ctxt);
12152 else
12153#endif
12154 xmlParseContent(ctxt);
12155
Daniel Veillard29b17482004-08-16 00:39:03 +000012156 nsPop(ctxt, nsnr);
12157 if ((RAW == '<') && (NXT(1) == '/')) {
12158 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12159 } else if (RAW != 0) {
12160 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12161 }
12162 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12164 ctxt->wellFormed = 0;
12165 }
12166
12167 if (!ctxt->wellFormed) {
12168 if (ctxt->errNo == 0)
12169 ret = XML_ERR_INTERNAL_ERROR;
12170 else
12171 ret = (xmlParserErrors)ctxt->errNo;
12172 } else {
12173 ret = XML_ERR_OK;
12174 }
12175
12176 /*
12177 * Return the newly created nodeset after unlinking it from
12178 * the pseudo sibling.
12179 */
12180
12181 cur = fake->next;
12182 fake->next = NULL;
12183 node->last = fake;
12184
12185 if (cur != NULL) {
12186 cur->prev = NULL;
12187 }
12188
12189 *lst = cur;
12190
12191 while (cur != NULL) {
12192 cur->parent = NULL;
12193 cur = cur->next;
12194 }
12195
12196 xmlUnlinkNode(fake);
12197 xmlFreeNode(fake);
12198
12199
12200 if (ret != XML_ERR_OK) {
12201 xmlFreeNodeList(*lst);
12202 *lst = NULL;
12203 }
William M. Brackc3f81342004-10-03 01:22:44 +000012204
William M. Brackb7b54de2004-10-06 16:38:01 +000012205 if (doc->dict != NULL)
12206 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012207 xmlFreeParserCtxt(ctxt);
12208
12209 return(ret);
12210#else /* !SAX2 */
12211 return(XML_ERR_INTERNAL_ERROR);
12212#endif
12213}
12214
Daniel Veillard81273902003-09-30 00:43:48 +000012215#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012216/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012217 * xmlParseBalancedChunkMemoryRecover:
12218 * @doc: the document the chunk pertains to
12219 * @sax: the SAX handler bloc (possibly NULL)
12220 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12221 * @depth: Used for loop detection, use 0
12222 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12223 * @lst: the return value for the set of parsed nodes
12224 * @recover: return nodes even if the data is broken (use 0)
12225 *
12226 *
12227 * Parse a well-balanced chunk of an XML document
12228 * called by the parser
12229 * The allowed sequence for the Well Balanced Chunk is the one defined by
12230 * the content production in the XML grammar:
12231 *
12232 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12233 *
12234 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12235 * the parser error code otherwise
12236 *
12237 * In case recover is set to 1, the nodelist will not be empty even if
12238 * the parsed chunk is not well balanced.
12239 */
12240int
12241xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12242 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12243 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012244 xmlParserCtxtPtr ctxt;
12245 xmlDocPtr newDoc;
12246 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012247 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012248 int size;
12249 int ret = 0;
12250
12251 if (depth > 40) {
12252 return(XML_ERR_ENTITY_LOOP);
12253 }
12254
12255
Daniel Veillardcda96922001-08-21 10:56:31 +000012256 if (lst != NULL)
12257 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012258 if (string == NULL)
12259 return(-1);
12260
12261 size = xmlStrlen(string);
12262
12263 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12264 if (ctxt == NULL) return(-1);
12265 ctxt->userData = ctxt;
12266 if (sax != NULL) {
12267 oldsax = ctxt->sax;
12268 ctxt->sax = sax;
12269 if (user_data != NULL)
12270 ctxt->userData = user_data;
12271 }
12272 newDoc = xmlNewDoc(BAD_CAST "1.0");
12273 if (newDoc == NULL) {
12274 xmlFreeParserCtxt(ctxt);
12275 return(-1);
12276 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012277 if ((doc != NULL) && (doc->dict != NULL)) {
12278 xmlDictFree(ctxt->dict);
12279 ctxt->dict = doc->dict;
12280 xmlDictReference(ctxt->dict);
12281 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12282 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12283 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12284 ctxt->dictNames = 1;
12285 } else {
12286 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12287 }
Owen Taylor3473f882001-02-23 17:55:21 +000012288 if (doc != NULL) {
12289 newDoc->intSubset = doc->intSubset;
12290 newDoc->extSubset = doc->extSubset;
12291 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012292 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12293 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012294 if (sax != NULL)
12295 ctxt->sax = oldsax;
12296 xmlFreeParserCtxt(ctxt);
12297 newDoc->intSubset = NULL;
12298 newDoc->extSubset = NULL;
12299 xmlFreeDoc(newDoc);
12300 return(-1);
12301 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012302 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12303 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012304 if (doc == NULL) {
12305 ctxt->myDoc = newDoc;
12306 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012307 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012308 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012309 /* Ensure that doc has XML spec namespace */
12310 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12311 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012312 }
12313 ctxt->instate = XML_PARSER_CONTENT;
12314 ctxt->depth = depth;
12315
12316 /*
12317 * Doing validity checking on chunk doesn't make sense
12318 */
12319 ctxt->validate = 0;
12320 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012321 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012322
Daniel Veillardb39bc392002-10-26 19:29:51 +000012323 if ( doc != NULL ){
12324 content = doc->children;
12325 doc->children = NULL;
12326 xmlParseContent(ctxt);
12327 doc->children = content;
12328 }
12329 else {
12330 xmlParseContent(ctxt);
12331 }
Owen Taylor3473f882001-02-23 17:55:21 +000012332 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012333 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012334 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012335 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012336 }
12337 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012338 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012339 }
12340
12341 if (!ctxt->wellFormed) {
12342 if (ctxt->errNo == 0)
12343 ret = 1;
12344 else
12345 ret = ctxt->errNo;
12346 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012347 ret = 0;
12348 }
12349
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012350 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12351 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012352
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012353 /*
12354 * Return the newly created nodeset after unlinking it from
12355 * they pseudo parent.
12356 */
12357 cur = newDoc->children->children;
12358 *lst = cur;
12359 while (cur != NULL) {
12360 xmlSetTreeDoc(cur, doc);
12361 cur->parent = NULL;
12362 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012363 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012364 newDoc->children->children = NULL;
12365 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012366
Owen Taylor3473f882001-02-23 17:55:21 +000012367 if (sax != NULL)
12368 ctxt->sax = oldsax;
12369 xmlFreeParserCtxt(ctxt);
12370 newDoc->intSubset = NULL;
12371 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012372 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012373 xmlFreeDoc(newDoc);
12374
12375 return(ret);
12376}
12377
12378/**
12379 * xmlSAXParseEntity:
12380 * @sax: the SAX handler block
12381 * @filename: the filename
12382 *
12383 * parse an XML external entity out of context and build a tree.
12384 * It use the given SAX function block to handle the parsing callback.
12385 * If sax is NULL, fallback to the default DOM tree building routines.
12386 *
12387 * [78] extParsedEnt ::= TextDecl? content
12388 *
12389 * This correspond to a "Well Balanced" chunk
12390 *
12391 * Returns the resulting document tree
12392 */
12393
12394xmlDocPtr
12395xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12396 xmlDocPtr ret;
12397 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012398
12399 ctxt = xmlCreateFileParserCtxt(filename);
12400 if (ctxt == NULL) {
12401 return(NULL);
12402 }
12403 if (sax != NULL) {
12404 if (ctxt->sax != NULL)
12405 xmlFree(ctxt->sax);
12406 ctxt->sax = sax;
12407 ctxt->userData = NULL;
12408 }
12409
Owen Taylor3473f882001-02-23 17:55:21 +000012410 xmlParseExtParsedEnt(ctxt);
12411
12412 if (ctxt->wellFormed)
12413 ret = ctxt->myDoc;
12414 else {
12415 ret = NULL;
12416 xmlFreeDoc(ctxt->myDoc);
12417 ctxt->myDoc = NULL;
12418 }
12419 if (sax != NULL)
12420 ctxt->sax = NULL;
12421 xmlFreeParserCtxt(ctxt);
12422
12423 return(ret);
12424}
12425
12426/**
12427 * xmlParseEntity:
12428 * @filename: the filename
12429 *
12430 * parse an XML external entity out of context and build a tree.
12431 *
12432 * [78] extParsedEnt ::= TextDecl? content
12433 *
12434 * This correspond to a "Well Balanced" chunk
12435 *
12436 * Returns the resulting document tree
12437 */
12438
12439xmlDocPtr
12440xmlParseEntity(const char *filename) {
12441 return(xmlSAXParseEntity(NULL, filename));
12442}
Daniel Veillard81273902003-09-30 00:43:48 +000012443#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012444
12445/**
12446 * xmlCreateEntityParserCtxt:
12447 * @URL: the entity URL
12448 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012449 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012450 *
12451 * Create a parser context for an external entity
12452 * Automatic support for ZLIB/Compress compressed document is provided
12453 * by default if found at compile-time.
12454 *
12455 * Returns the new parser context or NULL
12456 */
12457xmlParserCtxtPtr
12458xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12459 const xmlChar *base) {
12460 xmlParserCtxtPtr ctxt;
12461 xmlParserInputPtr inputStream;
12462 char *directory = NULL;
12463 xmlChar *uri;
12464
12465 ctxt = xmlNewParserCtxt();
12466 if (ctxt == NULL) {
12467 return(NULL);
12468 }
12469
12470 uri = xmlBuildURI(URL, base);
12471
12472 if (uri == NULL) {
12473 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12474 if (inputStream == NULL) {
12475 xmlFreeParserCtxt(ctxt);
12476 return(NULL);
12477 }
12478
12479 inputPush(ctxt, inputStream);
12480
12481 if ((ctxt->directory == NULL) && (directory == NULL))
12482 directory = xmlParserGetDirectory((char *)URL);
12483 if ((ctxt->directory == NULL) && (directory != NULL))
12484 ctxt->directory = directory;
12485 } else {
12486 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12487 if (inputStream == NULL) {
12488 xmlFree(uri);
12489 xmlFreeParserCtxt(ctxt);
12490 return(NULL);
12491 }
12492
12493 inputPush(ctxt, inputStream);
12494
12495 if ((ctxt->directory == NULL) && (directory == NULL))
12496 directory = xmlParserGetDirectory((char *)uri);
12497 if ((ctxt->directory == NULL) && (directory != NULL))
12498 ctxt->directory = directory;
12499 xmlFree(uri);
12500 }
Owen Taylor3473f882001-02-23 17:55:21 +000012501 return(ctxt);
12502}
12503
12504/************************************************************************
12505 * *
12506 * Front ends when parsing from a file *
12507 * *
12508 ************************************************************************/
12509
12510/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012511 * xmlCreateURLParserCtxt:
12512 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012513 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012514 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012515 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012516 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012517 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012518 *
12519 * Returns the new parser context or NULL
12520 */
12521xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012522xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012523{
12524 xmlParserCtxtPtr ctxt;
12525 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012526 char *directory = NULL;
12527
Owen Taylor3473f882001-02-23 17:55:21 +000012528 ctxt = xmlNewParserCtxt();
12529 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012530 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012531 return(NULL);
12532 }
12533
Daniel Veillarddf292f72005-01-16 19:00:15 +000012534 if (options)
12535 xmlCtxtUseOptions(ctxt, options);
12536 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012537
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012538 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012539 if (inputStream == NULL) {
12540 xmlFreeParserCtxt(ctxt);
12541 return(NULL);
12542 }
12543
Owen Taylor3473f882001-02-23 17:55:21 +000012544 inputPush(ctxt, inputStream);
12545 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012546 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012547 if ((ctxt->directory == NULL) && (directory != NULL))
12548 ctxt->directory = directory;
12549
12550 return(ctxt);
12551}
12552
Daniel Veillard61b93382003-11-03 14:28:31 +000012553/**
12554 * xmlCreateFileParserCtxt:
12555 * @filename: the filename
12556 *
12557 * Create a parser context for a file content.
12558 * Automatic support for ZLIB/Compress compressed document is provided
12559 * by default if found at compile-time.
12560 *
12561 * Returns the new parser context or NULL
12562 */
12563xmlParserCtxtPtr
12564xmlCreateFileParserCtxt(const char *filename)
12565{
12566 return(xmlCreateURLParserCtxt(filename, 0));
12567}
12568
Daniel Veillard81273902003-09-30 00:43:48 +000012569#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012570/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012571 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012572 * @sax: the SAX handler block
12573 * @filename: the filename
12574 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12575 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012576 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012577 *
12578 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12579 * compressed document is provided by default if found at compile-time.
12580 * It use the given SAX function block to handle the parsing callback.
12581 * If sax is NULL, fallback to the default DOM tree building routines.
12582 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012583 * User data (void *) is stored within the parser context in the
12584 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012585 *
Owen Taylor3473f882001-02-23 17:55:21 +000012586 * Returns the resulting document tree
12587 */
12588
12589xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012590xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12591 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012592 xmlDocPtr ret;
12593 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012594
Daniel Veillard635ef722001-10-29 11:48:19 +000012595 xmlInitParser();
12596
Owen Taylor3473f882001-02-23 17:55:21 +000012597 ctxt = xmlCreateFileParserCtxt(filename);
12598 if (ctxt == NULL) {
12599 return(NULL);
12600 }
12601 if (sax != NULL) {
12602 if (ctxt->sax != NULL)
12603 xmlFree(ctxt->sax);
12604 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012605 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012606 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012607 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012608 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012609 }
Owen Taylor3473f882001-02-23 17:55:21 +000012610
Daniel Veillard37d2d162008-03-14 10:54:00 +000012611 if (ctxt->directory == NULL)
12612 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012613
Daniel Veillarddad3f682002-11-17 16:47:27 +000012614 ctxt->recovery = recovery;
12615
Owen Taylor3473f882001-02-23 17:55:21 +000012616 xmlParseDocument(ctxt);
12617
William M. Brackc07329e2003-09-08 01:57:30 +000012618 if ((ctxt->wellFormed) || recovery) {
12619 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012620 if (ret != NULL) {
12621 if (ctxt->input->buf->compressed > 0)
12622 ret->compression = 9;
12623 else
12624 ret->compression = ctxt->input->buf->compressed;
12625 }
William M. Brackc07329e2003-09-08 01:57:30 +000012626 }
Owen Taylor3473f882001-02-23 17:55:21 +000012627 else {
12628 ret = NULL;
12629 xmlFreeDoc(ctxt->myDoc);
12630 ctxt->myDoc = NULL;
12631 }
12632 if (sax != NULL)
12633 ctxt->sax = NULL;
12634 xmlFreeParserCtxt(ctxt);
12635
12636 return(ret);
12637}
12638
12639/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012640 * xmlSAXParseFile:
12641 * @sax: the SAX handler block
12642 * @filename: the filename
12643 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12644 * documents
12645 *
12646 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12647 * compressed document is provided by default if found at compile-time.
12648 * It use the given SAX function block to handle the parsing callback.
12649 * If sax is NULL, fallback to the default DOM tree building routines.
12650 *
12651 * Returns the resulting document tree
12652 */
12653
12654xmlDocPtr
12655xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12656 int recovery) {
12657 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12658}
12659
12660/**
Owen Taylor3473f882001-02-23 17:55:21 +000012661 * xmlRecoverDoc:
12662 * @cur: a pointer to an array of xmlChar
12663 *
12664 * parse an XML in-memory document and build a tree.
12665 * In the case the document is not Well Formed, a tree is built anyway
12666 *
12667 * Returns the resulting document tree
12668 */
12669
12670xmlDocPtr
12671xmlRecoverDoc(xmlChar *cur) {
12672 return(xmlSAXParseDoc(NULL, cur, 1));
12673}
12674
12675/**
12676 * xmlParseFile:
12677 * @filename: the filename
12678 *
12679 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12680 * compressed document is provided by default if found at compile-time.
12681 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012682 * Returns the resulting document tree if the file was wellformed,
12683 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012684 */
12685
12686xmlDocPtr
12687xmlParseFile(const char *filename) {
12688 return(xmlSAXParseFile(NULL, filename, 0));
12689}
12690
12691/**
12692 * xmlRecoverFile:
12693 * @filename: the filename
12694 *
12695 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12696 * compressed document is provided by default if found at compile-time.
12697 * In the case the document is not Well Formed, a tree is built anyway
12698 *
12699 * Returns the resulting document tree
12700 */
12701
12702xmlDocPtr
12703xmlRecoverFile(const char *filename) {
12704 return(xmlSAXParseFile(NULL, filename, 1));
12705}
12706
12707
12708/**
12709 * xmlSetupParserForBuffer:
12710 * @ctxt: an XML parser context
12711 * @buffer: a xmlChar * buffer
12712 * @filename: a file name
12713 *
12714 * Setup the parser context to parse a new buffer; Clears any prior
12715 * contents from the parser context. The buffer parameter must not be
12716 * NULL, but the filename parameter can be
12717 */
12718void
12719xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12720 const char* filename)
12721{
12722 xmlParserInputPtr input;
12723
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012724 if ((ctxt == NULL) || (buffer == NULL))
12725 return;
12726
Owen Taylor3473f882001-02-23 17:55:21 +000012727 input = xmlNewInputStream(ctxt);
12728 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012729 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012730 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012731 return;
12732 }
12733
12734 xmlClearParserCtxt(ctxt);
12735 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012736 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012737 input->base = buffer;
12738 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012739 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012740 inputPush(ctxt, input);
12741}
12742
12743/**
12744 * xmlSAXUserParseFile:
12745 * @sax: a SAX handler
12746 * @user_data: The user data returned on SAX callbacks
12747 * @filename: a file name
12748 *
12749 * parse an XML file and call the given SAX handler routines.
12750 * Automatic support for ZLIB/Compress compressed document is provided
12751 *
12752 * Returns 0 in case of success or a error number otherwise
12753 */
12754int
12755xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12756 const char *filename) {
12757 int ret = 0;
12758 xmlParserCtxtPtr ctxt;
12759
12760 ctxt = xmlCreateFileParserCtxt(filename);
12761 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012762 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012763 xmlFree(ctxt->sax);
12764 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012765 xmlDetectSAX2(ctxt);
12766
Owen Taylor3473f882001-02-23 17:55:21 +000012767 if (user_data != NULL)
12768 ctxt->userData = user_data;
12769
12770 xmlParseDocument(ctxt);
12771
12772 if (ctxt->wellFormed)
12773 ret = 0;
12774 else {
12775 if (ctxt->errNo != 0)
12776 ret = ctxt->errNo;
12777 else
12778 ret = -1;
12779 }
12780 if (sax != NULL)
12781 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012782 if (ctxt->myDoc != NULL) {
12783 xmlFreeDoc(ctxt->myDoc);
12784 ctxt->myDoc = NULL;
12785 }
Owen Taylor3473f882001-02-23 17:55:21 +000012786 xmlFreeParserCtxt(ctxt);
12787
12788 return ret;
12789}
Daniel Veillard81273902003-09-30 00:43:48 +000012790#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012791
12792/************************************************************************
12793 * *
12794 * Front ends when parsing from memory *
12795 * *
12796 ************************************************************************/
12797
12798/**
12799 * xmlCreateMemoryParserCtxt:
12800 * @buffer: a pointer to a char array
12801 * @size: the size of the array
12802 *
12803 * Create a parser context for an XML in-memory document.
12804 *
12805 * Returns the new parser context or NULL
12806 */
12807xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012808xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012809 xmlParserCtxtPtr ctxt;
12810 xmlParserInputPtr input;
12811 xmlParserInputBufferPtr buf;
12812
12813 if (buffer == NULL)
12814 return(NULL);
12815 if (size <= 0)
12816 return(NULL);
12817
12818 ctxt = xmlNewParserCtxt();
12819 if (ctxt == NULL)
12820 return(NULL);
12821
Daniel Veillard53350552003-09-18 13:35:51 +000012822 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012823 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012824 if (buf == NULL) {
12825 xmlFreeParserCtxt(ctxt);
12826 return(NULL);
12827 }
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829 input = xmlNewInputStream(ctxt);
12830 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012831 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012832 xmlFreeParserCtxt(ctxt);
12833 return(NULL);
12834 }
12835
12836 input->filename = NULL;
12837 input->buf = buf;
12838 input->base = input->buf->buffer->content;
12839 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012840 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012841
12842 inputPush(ctxt, input);
12843 return(ctxt);
12844}
12845
Daniel Veillard81273902003-09-30 00:43:48 +000012846#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012847/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012848 * xmlSAXParseMemoryWithData:
12849 * @sax: the SAX handler block
12850 * @buffer: an pointer to a char array
12851 * @size: the size of the array
12852 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12853 * documents
12854 * @data: the userdata
12855 *
12856 * parse an XML in-memory block and use the given SAX function block
12857 * to handle the parsing callback. If sax is NULL, fallback to the default
12858 * DOM tree building routines.
12859 *
12860 * User data (void *) is stored within the parser context in the
12861 * context's _private member, so it is available nearly everywhere in libxml
12862 *
12863 * Returns the resulting document tree
12864 */
12865
12866xmlDocPtr
12867xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12868 int size, int recovery, void *data) {
12869 xmlDocPtr ret;
12870 xmlParserCtxtPtr ctxt;
12871
12872 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12873 if (ctxt == NULL) return(NULL);
12874 if (sax != NULL) {
12875 if (ctxt->sax != NULL)
12876 xmlFree(ctxt->sax);
12877 ctxt->sax = sax;
12878 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012879 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012880 if (data!=NULL) {
12881 ctxt->_private=data;
12882 }
12883
Daniel Veillardadba5f12003-04-04 16:09:01 +000012884 ctxt->recovery = recovery;
12885
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012886 xmlParseDocument(ctxt);
12887
12888 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12889 else {
12890 ret = NULL;
12891 xmlFreeDoc(ctxt->myDoc);
12892 ctxt->myDoc = NULL;
12893 }
12894 if (sax != NULL)
12895 ctxt->sax = NULL;
12896 xmlFreeParserCtxt(ctxt);
12897
12898 return(ret);
12899}
12900
12901/**
Owen Taylor3473f882001-02-23 17:55:21 +000012902 * xmlSAXParseMemory:
12903 * @sax: the SAX handler block
12904 * @buffer: an pointer to a char array
12905 * @size: the size of the array
12906 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12907 * documents
12908 *
12909 * parse an XML in-memory block and use the given SAX function block
12910 * to handle the parsing callback. If sax is NULL, fallback to the default
12911 * DOM tree building routines.
12912 *
12913 * Returns the resulting document tree
12914 */
12915xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012916xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12917 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012918 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012919}
12920
12921/**
12922 * xmlParseMemory:
12923 * @buffer: an pointer to a char array
12924 * @size: the size of the array
12925 *
12926 * parse an XML in-memory block and build a tree.
12927 *
12928 * Returns the resulting document tree
12929 */
12930
Daniel Veillard50822cb2001-07-26 20:05:51 +000012931xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012932 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12933}
12934
12935/**
12936 * xmlRecoverMemory:
12937 * @buffer: an pointer to a char array
12938 * @size: the size of the array
12939 *
12940 * parse an XML in-memory block and build a tree.
12941 * In the case the document is not Well Formed, a tree is built anyway
12942 *
12943 * Returns the resulting document tree
12944 */
12945
Daniel Veillard50822cb2001-07-26 20:05:51 +000012946xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012947 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12948}
12949
12950/**
12951 * xmlSAXUserParseMemory:
12952 * @sax: a SAX handler
12953 * @user_data: The user data returned on SAX callbacks
12954 * @buffer: an in-memory XML document input
12955 * @size: the length of the XML document in bytes
12956 *
12957 * A better SAX parsing routine.
12958 * parse an XML in-memory buffer and call the given SAX handler routines.
12959 *
12960 * Returns 0 in case of success or a error number otherwise
12961 */
12962int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012963 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012964 int ret = 0;
12965 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012966
12967 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12968 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012969 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12970 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000012971 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012972 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012973
Daniel Veillard30211a02001-04-26 09:33:18 +000012974 if (user_data != NULL)
12975 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012976
12977 xmlParseDocument(ctxt);
12978
12979 if (ctxt->wellFormed)
12980 ret = 0;
12981 else {
12982 if (ctxt->errNo != 0)
12983 ret = ctxt->errNo;
12984 else
12985 ret = -1;
12986 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012987 if (sax != NULL)
12988 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012989 if (ctxt->myDoc != NULL) {
12990 xmlFreeDoc(ctxt->myDoc);
12991 ctxt->myDoc = NULL;
12992 }
Owen Taylor3473f882001-02-23 17:55:21 +000012993 xmlFreeParserCtxt(ctxt);
12994
12995 return ret;
12996}
Daniel Veillard81273902003-09-30 00:43:48 +000012997#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012998
12999/**
13000 * xmlCreateDocParserCtxt:
13001 * @cur: a pointer to an array of xmlChar
13002 *
13003 * Creates a parser context for an XML in-memory document.
13004 *
13005 * Returns the new parser context or NULL
13006 */
13007xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013009 int len;
13010
13011 if (cur == NULL)
13012 return(NULL);
13013 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013014 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013015}
13016
Daniel Veillard81273902003-09-30 00:43:48 +000013017#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013018/**
13019 * xmlSAXParseDoc:
13020 * @sax: the SAX handler block
13021 * @cur: a pointer to an array of xmlChar
13022 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13023 * documents
13024 *
13025 * parse an XML in-memory document and build a tree.
13026 * It use the given SAX function block to handle the parsing callback.
13027 * If sax is NULL, fallback to the default DOM tree building routines.
13028 *
13029 * Returns the resulting document tree
13030 */
13031
13032xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013033xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013034 xmlDocPtr ret;
13035 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013036 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013037
Daniel Veillard38936062004-11-04 17:45:11 +000013038 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013039
13040
13041 ctxt = xmlCreateDocParserCtxt(cur);
13042 if (ctxt == NULL) return(NULL);
13043 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013044 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013045 ctxt->sax = sax;
13046 ctxt->userData = NULL;
13047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013048 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013049
13050 xmlParseDocument(ctxt);
13051 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13052 else {
13053 ret = NULL;
13054 xmlFreeDoc(ctxt->myDoc);
13055 ctxt->myDoc = NULL;
13056 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013057 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013058 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013059 xmlFreeParserCtxt(ctxt);
13060
13061 return(ret);
13062}
13063
13064/**
13065 * xmlParseDoc:
13066 * @cur: a pointer to an array of xmlChar
13067 *
13068 * parse an XML in-memory document and build a tree.
13069 *
13070 * Returns the resulting document tree
13071 */
13072
13073xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013074xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013075 return(xmlSAXParseDoc(NULL, cur, 0));
13076}
Daniel Veillard81273902003-09-30 00:43:48 +000013077#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013078
Daniel Veillard81273902003-09-30 00:43:48 +000013079#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013080/************************************************************************
13081 * *
13082 * Specific function to keep track of entities references *
13083 * and used by the XSLT debugger *
13084 * *
13085 ************************************************************************/
13086
13087static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13088
13089/**
13090 * xmlAddEntityReference:
13091 * @ent : A valid entity
13092 * @firstNode : A valid first node for children of entity
13093 * @lastNode : A valid last node of children entity
13094 *
13095 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13096 */
13097static void
13098xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13099 xmlNodePtr lastNode)
13100{
13101 if (xmlEntityRefFunc != NULL) {
13102 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13103 }
13104}
13105
13106
13107/**
13108 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013109 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013110 *
13111 * Set the function to call call back when a xml reference has been made
13112 */
13113void
13114xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13115{
13116 xmlEntityRefFunc = func;
13117}
Daniel Veillard81273902003-09-30 00:43:48 +000013118#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013119
13120/************************************************************************
13121 * *
13122 * Miscellaneous *
13123 * *
13124 ************************************************************************/
13125
13126#ifdef LIBXML_XPATH_ENABLED
13127#include <libxml/xpath.h>
13128#endif
13129
Daniel Veillardffa3c742005-07-21 13:24:09 +000013130extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013131static int xmlParserInitialized = 0;
13132
13133/**
13134 * xmlInitParser:
13135 *
13136 * Initialization function for the XML parser.
13137 * This is not reentrant. Call once before processing in case of
13138 * use in multithreaded programs.
13139 */
13140
13141void
13142xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013143 if (xmlParserInitialized != 0)
13144 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013145
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013146#ifdef LIBXML_THREAD_ENABLED
13147 __xmlGlobalInitMutexLock();
13148 if (xmlParserInitialized == 0) {
13149#endif
13150 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13151 (xmlGenericError == NULL))
13152 initGenericErrorDefaultFunc(NULL);
13153 xmlInitGlobals();
13154 xmlInitThreads();
13155 xmlInitMemory();
13156 xmlInitCharEncodingHandlers();
13157 xmlDefaultSAXHandlerInit();
13158 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013159#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013160 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013161#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013162#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013163 htmlInitAutoClose();
13164 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013165#endif
13166#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013167 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013168#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013169 xmlParserInitialized = 1;
13170#ifdef LIBXML_THREAD_ENABLED
13171 }
13172 __xmlGlobalInitMutexUnlock();
13173#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013174}
13175
13176/**
13177 * xmlCleanupParser:
13178 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013179 * Cleanup function for the XML library. It tries to reclaim all
13180 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000013181 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013182 * function should not prevent reusing the library but one should
13183 * call xmlCleanupParser() only when the process has
Daniel Veillardccc476f2008-03-04 13:19:49 +000013184 * finished using the library and all XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000013185 */
13186
13187void
13188xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013189 if (!xmlParserInitialized)
13190 return;
13191
Owen Taylor3473f882001-02-23 17:55:21 +000013192 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013193#ifdef LIBXML_CATALOG_ENABLED
13194 xmlCatalogCleanup();
13195#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013196 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013197 xmlCleanupInputCallbacks();
13198#ifdef LIBXML_OUTPUT_ENABLED
13199 xmlCleanupOutputCallbacks();
13200#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013201#ifdef LIBXML_SCHEMAS_ENABLED
13202 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013203 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013204#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013205 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013206 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013207 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013208 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013209 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013210}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013211
13212/************************************************************************
13213 * *
13214 * New set (2.6.0) of simpler and more flexible APIs *
13215 * *
13216 ************************************************************************/
13217
13218/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013219 * DICT_FREE:
13220 * @str: a string
13221 *
13222 * Free a string if it is not owned by the "dict" dictionnary in the
13223 * current scope
13224 */
13225#define DICT_FREE(str) \
13226 if ((str) && ((!dict) || \
13227 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13228 xmlFree((char *)(str));
13229
13230/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013231 * xmlCtxtReset:
13232 * @ctxt: an XML parser context
13233 *
13234 * Reset a parser context
13235 */
13236void
13237xmlCtxtReset(xmlParserCtxtPtr ctxt)
13238{
13239 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013240 xmlDictPtr dict;
13241
13242 if (ctxt == NULL)
13243 return;
13244
13245 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246
13247 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13248 xmlFreeInputStream(input);
13249 }
13250 ctxt->inputNr = 0;
13251 ctxt->input = NULL;
13252
13253 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013254 if (ctxt->spaceTab != NULL) {
13255 ctxt->spaceTab[0] = -1;
13256 ctxt->space = &ctxt->spaceTab[0];
13257 } else {
13258 ctxt->space = NULL;
13259 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013260
13261
13262 ctxt->nodeNr = 0;
13263 ctxt->node = NULL;
13264
13265 ctxt->nameNr = 0;
13266 ctxt->name = NULL;
13267
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013268 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013269 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013270 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013272 DICT_FREE(ctxt->directory);
13273 ctxt->directory = NULL;
13274 DICT_FREE(ctxt->extSubURI);
13275 ctxt->extSubURI = NULL;
13276 DICT_FREE(ctxt->extSubSystem);
13277 ctxt->extSubSystem = NULL;
13278 if (ctxt->myDoc != NULL)
13279 xmlFreeDoc(ctxt->myDoc);
13280 ctxt->myDoc = NULL;
13281
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013282 ctxt->standalone = -1;
13283 ctxt->hasExternalSubset = 0;
13284 ctxt->hasPErefs = 0;
13285 ctxt->html = 0;
13286 ctxt->external = 0;
13287 ctxt->instate = XML_PARSER_START;
13288 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013290 ctxt->wellFormed = 1;
13291 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013292 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013293 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013294#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013295 ctxt->vctxt.userData = ctxt;
13296 ctxt->vctxt.error = xmlParserValidityError;
13297 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013298#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013299 ctxt->record_info = 0;
13300 ctxt->nbChars = 0;
13301 ctxt->checkIndex = 0;
13302 ctxt->inSubset = 0;
13303 ctxt->errNo = XML_ERR_OK;
13304 ctxt->depth = 0;
13305 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13306 ctxt->catalogs = NULL;
13307 xmlInitNodeInfoSeq(&ctxt->node_seq);
13308
13309 if (ctxt->attsDefault != NULL) {
13310 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13311 ctxt->attsDefault = NULL;
13312 }
13313 if (ctxt->attsSpecial != NULL) {
13314 xmlHashFree(ctxt->attsSpecial, NULL);
13315 ctxt->attsSpecial = NULL;
13316 }
13317
Daniel Veillard4432df22003-09-28 18:58:27 +000013318#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013319 if (ctxt->catalogs != NULL)
13320 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013321#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013322 if (ctxt->lastError.code != XML_ERR_OK)
13323 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013324}
13325
13326/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013327 * xmlCtxtResetPush:
13328 * @ctxt: an XML parser context
13329 * @chunk: a pointer to an array of chars
13330 * @size: number of chars in the array
13331 * @filename: an optional file name or URI
13332 * @encoding: the document encoding, or NULL
13333 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013334 * Reset a push parser context
13335 *
13336 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013337 */
13338int
13339xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13340 int size, const char *filename, const char *encoding)
13341{
13342 xmlParserInputPtr inputStream;
13343 xmlParserInputBufferPtr buf;
13344 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13345
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013346 if (ctxt == NULL)
13347 return(1);
13348
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013349 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13350 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13351
13352 buf = xmlAllocParserInputBuffer(enc);
13353 if (buf == NULL)
13354 return(1);
13355
13356 if (ctxt == NULL) {
13357 xmlFreeParserInputBuffer(buf);
13358 return(1);
13359 }
13360
13361 xmlCtxtReset(ctxt);
13362
13363 if (ctxt->pushTab == NULL) {
13364 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13365 sizeof(xmlChar *));
13366 if (ctxt->pushTab == NULL) {
13367 xmlErrMemory(ctxt, NULL);
13368 xmlFreeParserInputBuffer(buf);
13369 return(1);
13370 }
13371 }
13372
13373 if (filename == NULL) {
13374 ctxt->directory = NULL;
13375 } else {
13376 ctxt->directory = xmlParserGetDirectory(filename);
13377 }
13378
13379 inputStream = xmlNewInputStream(ctxt);
13380 if (inputStream == NULL) {
13381 xmlFreeParserInputBuffer(buf);
13382 return(1);
13383 }
13384
13385 if (filename == NULL)
13386 inputStream->filename = NULL;
13387 else
13388 inputStream->filename = (char *)
13389 xmlCanonicPath((const xmlChar *) filename);
13390 inputStream->buf = buf;
13391 inputStream->base = inputStream->buf->buffer->content;
13392 inputStream->cur = inputStream->buf->buffer->content;
13393 inputStream->end =
13394 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13395
13396 inputPush(ctxt, inputStream);
13397
13398 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13399 (ctxt->input->buf != NULL)) {
13400 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13401 int cur = ctxt->input->cur - ctxt->input->base;
13402
13403 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13404
13405 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13406 ctxt->input->cur = ctxt->input->base + cur;
13407 ctxt->input->end =
13408 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13409 use];
13410#ifdef DEBUG_PUSH
13411 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13412#endif
13413 }
13414
13415 if (encoding != NULL) {
13416 xmlCharEncodingHandlerPtr hdlr;
13417
13418 hdlr = xmlFindCharEncodingHandler(encoding);
13419 if (hdlr != NULL) {
13420 xmlSwitchToEncoding(ctxt, hdlr);
13421 } else {
13422 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13423 "Unsupported encoding %s\n", BAD_CAST encoding);
13424 }
13425 } else if (enc != XML_CHAR_ENCODING_NONE) {
13426 xmlSwitchEncoding(ctxt, enc);
13427 }
13428
13429 return(0);
13430}
13431
13432/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013433 * xmlCtxtUseOptions:
13434 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013435 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013436 *
13437 * Applies the options to the parser context
13438 *
13439 * Returns 0 in case of success, the set of unknown or unimplemented options
13440 * in case of error.
13441 */
13442int
13443xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13444{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013445 if (ctxt == NULL)
13446 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013447 if (options & XML_PARSE_RECOVER) {
13448 ctxt->recovery = 1;
13449 options -= XML_PARSE_RECOVER;
13450 } else
13451 ctxt->recovery = 0;
13452 if (options & XML_PARSE_DTDLOAD) {
13453 ctxt->loadsubset = XML_DETECT_IDS;
13454 options -= XML_PARSE_DTDLOAD;
13455 } else
13456 ctxt->loadsubset = 0;
13457 if (options & XML_PARSE_DTDATTR) {
13458 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13459 options -= XML_PARSE_DTDATTR;
13460 }
13461 if (options & XML_PARSE_NOENT) {
13462 ctxt->replaceEntities = 1;
13463 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13464 options -= XML_PARSE_NOENT;
13465 } else
13466 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013467 if (options & XML_PARSE_PEDANTIC) {
13468 ctxt->pedantic = 1;
13469 options -= XML_PARSE_PEDANTIC;
13470 } else
13471 ctxt->pedantic = 0;
13472 if (options & XML_PARSE_NOBLANKS) {
13473 ctxt->keepBlanks = 0;
13474 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13475 options -= XML_PARSE_NOBLANKS;
13476 } else
13477 ctxt->keepBlanks = 1;
13478 if (options & XML_PARSE_DTDVALID) {
13479 ctxt->validate = 1;
13480 if (options & XML_PARSE_NOWARNING)
13481 ctxt->vctxt.warning = NULL;
13482 if (options & XML_PARSE_NOERROR)
13483 ctxt->vctxt.error = NULL;
13484 options -= XML_PARSE_DTDVALID;
13485 } else
13486 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013487 if (options & XML_PARSE_NOWARNING) {
13488 ctxt->sax->warning = NULL;
13489 options -= XML_PARSE_NOWARNING;
13490 }
13491 if (options & XML_PARSE_NOERROR) {
13492 ctxt->sax->error = NULL;
13493 ctxt->sax->fatalError = NULL;
13494 options -= XML_PARSE_NOERROR;
13495 }
Daniel Veillard81273902003-09-30 00:43:48 +000013496#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013497 if (options & XML_PARSE_SAX1) {
13498 ctxt->sax->startElement = xmlSAX2StartElement;
13499 ctxt->sax->endElement = xmlSAX2EndElement;
13500 ctxt->sax->startElementNs = NULL;
13501 ctxt->sax->endElementNs = NULL;
13502 ctxt->sax->initialized = 1;
13503 options -= XML_PARSE_SAX1;
13504 }
Daniel Veillard81273902003-09-30 00:43:48 +000013505#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013506 if (options & XML_PARSE_NODICT) {
13507 ctxt->dictNames = 0;
13508 options -= XML_PARSE_NODICT;
13509 } else {
13510 ctxt->dictNames = 1;
13511 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013512 if (options & XML_PARSE_NOCDATA) {
13513 ctxt->sax->cdataBlock = NULL;
13514 options -= XML_PARSE_NOCDATA;
13515 }
13516 if (options & XML_PARSE_NSCLEAN) {
13517 ctxt->options |= XML_PARSE_NSCLEAN;
13518 options -= XML_PARSE_NSCLEAN;
13519 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013520 if (options & XML_PARSE_NONET) {
13521 ctxt->options |= XML_PARSE_NONET;
13522 options -= XML_PARSE_NONET;
13523 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013524 if (options & XML_PARSE_COMPACT) {
13525 ctxt->options |= XML_PARSE_COMPACT;
13526 options -= XML_PARSE_COMPACT;
13527 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013528 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013529 return (options);
13530}
13531
13532/**
13533 * xmlDoRead:
13534 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013535 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013536 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013537 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013538 * @reuse: keep the context for reuse
13539 *
13540 * Common front-end for the xmlRead functions
13541 *
13542 * Returns the resulting document tree or NULL
13543 */
13544static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013545xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13546 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013547{
13548 xmlDocPtr ret;
13549
13550 xmlCtxtUseOptions(ctxt, options);
13551 if (encoding != NULL) {
13552 xmlCharEncodingHandlerPtr hdlr;
13553
13554 hdlr = xmlFindCharEncodingHandler(encoding);
13555 if (hdlr != NULL)
13556 xmlSwitchToEncoding(ctxt, hdlr);
13557 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013558 if ((URL != NULL) && (ctxt->input != NULL) &&
13559 (ctxt->input->filename == NULL))
13560 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013561 xmlParseDocument(ctxt);
13562 if ((ctxt->wellFormed) || ctxt->recovery)
13563 ret = ctxt->myDoc;
13564 else {
13565 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013566 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013567 xmlFreeDoc(ctxt->myDoc);
13568 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013569 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013570 ctxt->myDoc = NULL;
13571 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013572 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013573 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013574
13575 return (ret);
13576}
13577
13578/**
13579 * xmlReadDoc:
13580 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013581 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013582 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013583 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013584 *
13585 * parse an XML in-memory document and build a tree.
13586 *
13587 * Returns the resulting document tree
13588 */
13589xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013590xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013591{
13592 xmlParserCtxtPtr ctxt;
13593
13594 if (cur == NULL)
13595 return (NULL);
13596
13597 ctxt = xmlCreateDocParserCtxt(cur);
13598 if (ctxt == NULL)
13599 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013600 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013601}
13602
13603/**
13604 * xmlReadFile:
13605 * @filename: a file or URL
13606 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013607 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013608 *
13609 * parse an XML file from the filesystem or the network.
13610 *
13611 * Returns the resulting document tree
13612 */
13613xmlDocPtr
13614xmlReadFile(const char *filename, const char *encoding, int options)
13615{
13616 xmlParserCtxtPtr ctxt;
13617
Daniel Veillard61b93382003-11-03 14:28:31 +000013618 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013619 if (ctxt == NULL)
13620 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013621 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013622}
13623
13624/**
13625 * xmlReadMemory:
13626 * @buffer: a pointer to a char array
13627 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013628 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013629 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013630 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013631 *
13632 * parse an XML in-memory document and build a tree.
13633 *
13634 * Returns the resulting document tree
13635 */
13636xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013637xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013638{
13639 xmlParserCtxtPtr ctxt;
13640
13641 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13642 if (ctxt == NULL)
13643 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013644 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013645}
13646
13647/**
13648 * xmlReadFd:
13649 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013650 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013651 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013652 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013653 *
13654 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013655 * NOTE that the file descriptor will not be closed when the
13656 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013657 *
13658 * Returns the resulting document tree
13659 */
13660xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013661xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013662{
13663 xmlParserCtxtPtr ctxt;
13664 xmlParserInputBufferPtr input;
13665 xmlParserInputPtr stream;
13666
13667 if (fd < 0)
13668 return (NULL);
13669
13670 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13671 if (input == NULL)
13672 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013673 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013674 ctxt = xmlNewParserCtxt();
13675 if (ctxt == NULL) {
13676 xmlFreeParserInputBuffer(input);
13677 return (NULL);
13678 }
13679 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13680 if (stream == NULL) {
13681 xmlFreeParserInputBuffer(input);
13682 xmlFreeParserCtxt(ctxt);
13683 return (NULL);
13684 }
13685 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013686 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013687}
13688
13689/**
13690 * xmlReadIO:
13691 * @ioread: an I/O read function
13692 * @ioclose: an I/O close function
13693 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013694 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013695 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013696 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013697 *
13698 * parse an XML document from I/O functions and source and build a tree.
13699 *
13700 * Returns the resulting document tree
13701 */
13702xmlDocPtr
13703xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013704 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013705{
13706 xmlParserCtxtPtr ctxt;
13707 xmlParserInputBufferPtr input;
13708 xmlParserInputPtr stream;
13709
13710 if (ioread == NULL)
13711 return (NULL);
13712
13713 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13714 XML_CHAR_ENCODING_NONE);
13715 if (input == NULL)
13716 return (NULL);
13717 ctxt = xmlNewParserCtxt();
13718 if (ctxt == NULL) {
13719 xmlFreeParserInputBuffer(input);
13720 return (NULL);
13721 }
13722 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13723 if (stream == NULL) {
13724 xmlFreeParserInputBuffer(input);
13725 xmlFreeParserCtxt(ctxt);
13726 return (NULL);
13727 }
13728 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013729 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013730}
13731
13732/**
13733 * xmlCtxtReadDoc:
13734 * @ctxt: an XML parser context
13735 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013736 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013737 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013738 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013739 *
13740 * parse an XML in-memory document and build a tree.
13741 * This reuses the existing @ctxt parser context
13742 *
13743 * Returns the resulting document tree
13744 */
13745xmlDocPtr
13746xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013747 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013748{
13749 xmlParserInputPtr stream;
13750
13751 if (cur == NULL)
13752 return (NULL);
13753 if (ctxt == NULL)
13754 return (NULL);
13755
13756 xmlCtxtReset(ctxt);
13757
13758 stream = xmlNewStringInputStream(ctxt, cur);
13759 if (stream == NULL) {
13760 return (NULL);
13761 }
13762 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013763 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013764}
13765
13766/**
13767 * xmlCtxtReadFile:
13768 * @ctxt: an XML parser context
13769 * @filename: a file or URL
13770 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013771 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013772 *
13773 * parse an XML file from the filesystem or the network.
13774 * This reuses the existing @ctxt parser context
13775 *
13776 * Returns the resulting document tree
13777 */
13778xmlDocPtr
13779xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13780 const char *encoding, int options)
13781{
13782 xmlParserInputPtr stream;
13783
13784 if (filename == NULL)
13785 return (NULL);
13786 if (ctxt == NULL)
13787 return (NULL);
13788
13789 xmlCtxtReset(ctxt);
13790
Daniel Veillard29614c72004-11-26 10:47:26 +000013791 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013792 if (stream == NULL) {
13793 return (NULL);
13794 }
13795 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013796 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013797}
13798
13799/**
13800 * xmlCtxtReadMemory:
13801 * @ctxt: an XML parser context
13802 * @buffer: a pointer to a char array
13803 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013804 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013805 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013806 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013807 *
13808 * parse an XML in-memory document and build a tree.
13809 * This reuses the existing @ctxt parser context
13810 *
13811 * Returns the resulting document tree
13812 */
13813xmlDocPtr
13814xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013815 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013816{
13817 xmlParserInputBufferPtr input;
13818 xmlParserInputPtr stream;
13819
13820 if (ctxt == NULL)
13821 return (NULL);
13822 if (buffer == NULL)
13823 return (NULL);
13824
13825 xmlCtxtReset(ctxt);
13826
13827 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13828 if (input == NULL) {
13829 return(NULL);
13830 }
13831
13832 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13833 if (stream == NULL) {
13834 xmlFreeParserInputBuffer(input);
13835 return(NULL);
13836 }
13837
13838 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013839 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013840}
13841
13842/**
13843 * xmlCtxtReadFd:
13844 * @ctxt: an XML parser context
13845 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013846 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013847 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013848 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013849 *
13850 * parse an XML from a file descriptor and build a tree.
13851 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013852 * NOTE that the file descriptor will not be closed when the
13853 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013854 *
13855 * Returns the resulting document tree
13856 */
13857xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013858xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13859 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013860{
13861 xmlParserInputBufferPtr input;
13862 xmlParserInputPtr stream;
13863
13864 if (fd < 0)
13865 return (NULL);
13866 if (ctxt == NULL)
13867 return (NULL);
13868
13869 xmlCtxtReset(ctxt);
13870
13871
13872 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13873 if (input == NULL)
13874 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013875 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013876 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13877 if (stream == NULL) {
13878 xmlFreeParserInputBuffer(input);
13879 return (NULL);
13880 }
13881 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013882 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013883}
13884
13885/**
13886 * xmlCtxtReadIO:
13887 * @ctxt: an XML parser context
13888 * @ioread: an I/O read function
13889 * @ioclose: an I/O close function
13890 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013891 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013892 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013893 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013894 *
13895 * parse an XML document from I/O functions and source and build a tree.
13896 * This reuses the existing @ctxt parser context
13897 *
13898 * Returns the resulting document tree
13899 */
13900xmlDocPtr
13901xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13902 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013903 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013904 const char *encoding, int options)
13905{
13906 xmlParserInputBufferPtr input;
13907 xmlParserInputPtr stream;
13908
13909 if (ioread == NULL)
13910 return (NULL);
13911 if (ctxt == NULL)
13912 return (NULL);
13913
13914 xmlCtxtReset(ctxt);
13915
13916 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13917 XML_CHAR_ENCODING_NONE);
13918 if (input == NULL)
13919 return (NULL);
13920 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13921 if (stream == NULL) {
13922 xmlFreeParserInputBuffer(input);
13923 return (NULL);
13924 }
13925 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013926 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013927}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013928
13929#define bottom_parser
13930#include "elfgcchack.h"