blob: 80e6b6954be2206f5e1919ccbcabdcec8a684470 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
981 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000982 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000983 if (ctxt->attsDefault == NULL)
984 goto mem_error;
985 }
986
987 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000988 * split the element name into prefix:localname , the string found
989 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000990 */
991 name = xmlSplitQName3(fullname, &len);
992 if (name == NULL) {
993 name = xmlDictLookup(ctxt->dict, fullname, -1);
994 prefix = NULL;
995 } else {
996 name = xmlDictLookup(ctxt->dict, name, -1);
997 prefix = xmlDictLookup(ctxt->dict, fullname, len);
998 }
999
1000 /*
1001 * make sure there is some storage
1002 */
1003 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1004 if (defaults == NULL) {
1005 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001006 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001007 if (defaults == NULL)
1008 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001009 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001010 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001011 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1012 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001013 xmlDefAttrsPtr temp;
1014
1015 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001016 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001017 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001019 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001020 defaults->maxAttrs *= 2;
1021 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1022 }
1023
1024 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001025 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001026 * are within the DTD and hen not associated to namespace names.
1027 */
1028 name = xmlSplitQName3(fullattr, &len);
1029 if (name == NULL) {
1030 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1031 prefix = NULL;
1032 } else {
1033 name = xmlDictLookup(ctxt->dict, name, -1);
1034 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1035 }
1036
1037 defaults->values[4 * defaults->nbAttrs] = name;
1038 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1039 /* intern the string and precompute the end */
1040 len = xmlStrlen(value);
1041 value = xmlDictLookup(ctxt->dict, value, len);
1042 defaults->values[4 * defaults->nbAttrs + 2] = value;
1043 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1044 defaults->nbAttrs++;
1045
1046 return;
1047
1048mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001049 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050 return;
1051}
1052
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001053/**
1054 * xmlAddSpecialAttr:
1055 * @ctxt: an XML parser context
1056 * @fullname: the element fullname
1057 * @fullattr: the attribute fullname
1058 * @type: the attribute type
1059 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001060 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001061 */
1062static void
1063xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1064 const xmlChar *fullname,
1065 const xmlChar *fullattr,
1066 int type)
1067{
1068 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001069 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001070 if (ctxt->attsSpecial == NULL)
1071 goto mem_error;
1072 }
1073
Daniel Veillardac4118d2008-01-11 05:27:32 +00001074 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1075 return;
1076
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001077 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1078 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001079 return;
1080
1081mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001082 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001083 return;
1084}
1085
Daniel Veillard4432df22003-09-28 18:58:27 +00001086/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001087 * xmlCleanSpecialAttrCallback:
1088 *
1089 * Removes CDATA attributes from the special attribute table
1090 */
1091static void
1092xmlCleanSpecialAttrCallback(void *payload, void *data,
1093 const xmlChar *fullname, const xmlChar *fullattr,
1094 const xmlChar *unused ATTRIBUTE_UNUSED) {
1095 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1096
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001097 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001098 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1099 }
1100}
1101
1102/**
1103 * xmlCleanSpecialAttr:
1104 * @ctxt: an XML parser context
1105 *
1106 * Trim the list of attributes defined to remove all those of type
1107 * CDATA as they are not special. This call should be done when finishing
1108 * to parse the DTD and before starting to parse the document root.
1109 */
1110static void
1111xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1112{
1113 if (ctxt->attsSpecial == NULL)
1114 return;
1115
1116 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1117
1118 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1119 xmlHashFree(ctxt->attsSpecial, NULL);
1120 ctxt->attsSpecial = NULL;
1121 }
1122 return;
1123}
1124
1125/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001126 * xmlCheckLanguageID:
1127 * @lang: pointer to the string value
1128 *
1129 * Checks that the value conforms to the LanguageID production:
1130 *
1131 * NOTE: this is somewhat deprecated, those productions were removed from
1132 * the XML Second edition.
1133 *
1134 * [33] LanguageID ::= Langcode ('-' Subcode)*
1135 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1136 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1137 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1138 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1139 * [38] Subcode ::= ([a-z] | [A-Z])+
1140 *
1141 * Returns 1 if correct 0 otherwise
1142 **/
1143int
1144xmlCheckLanguageID(const xmlChar * lang)
1145{
1146 const xmlChar *cur = lang;
1147
1148 if (cur == NULL)
1149 return (0);
1150 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1151 ((cur[0] == 'I') && (cur[1] == '-'))) {
1152 /*
1153 * IANA code
1154 */
1155 cur += 2;
1156 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1157 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1158 cur++;
1159 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1160 ((cur[0] == 'X') && (cur[1] == '-'))) {
1161 /*
1162 * User code
1163 */
1164 cur += 2;
1165 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1166 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1167 cur++;
1168 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1169 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1170 /*
1171 * ISO639
1172 */
1173 cur++;
1174 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1175 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1176 cur++;
1177 else
1178 return (0);
1179 } else
1180 return (0);
1181 while (cur[0] != 0) { /* non input consuming */
1182 if (cur[0] != '-')
1183 return (0);
1184 cur++;
1185 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1186 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1187 cur++;
1188 else
1189 return (0);
1190 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1191 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1192 cur++;
1193 }
1194 return (1);
1195}
1196
Owen Taylor3473f882001-02-23 17:55:21 +00001197/************************************************************************
1198 * *
1199 * Parser stacks related functions and macros *
1200 * *
1201 ************************************************************************/
1202
1203xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1204 const xmlChar ** str);
1205
Daniel Veillard0fb18932003-09-07 09:14:37 +00001206#ifdef SAX2
1207/**
1208 * nsPush:
1209 * @ctxt: an XML parser context
1210 * @prefix: the namespace prefix or NULL
1211 * @URL: the namespace name
1212 *
1213 * Pushes a new parser namespace on top of the ns stack
1214 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001215 * Returns -1 in case of error, -2 if the namespace should be discarded
1216 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001217 */
1218static int
1219nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1220{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001221 if (ctxt->options & XML_PARSE_NSCLEAN) {
1222 int i;
1223 for (i = 0;i < ctxt->nsNr;i += 2) {
1224 if (ctxt->nsTab[i] == prefix) {
1225 /* in scope */
1226 if (ctxt->nsTab[i + 1] == URL)
1227 return(-2);
1228 /* out of scope keep it */
1229 break;
1230 }
1231 }
1232 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001233 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1234 ctxt->nsMax = 10;
1235 ctxt->nsNr = 0;
1236 ctxt->nsTab = (const xmlChar **)
1237 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1238 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001239 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001240 ctxt->nsMax = 0;
1241 return (-1);
1242 }
1243 } else if (ctxt->nsNr >= ctxt->nsMax) {
1244 ctxt->nsMax *= 2;
1245 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001246 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001247 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1248 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001249 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001250 ctxt->nsMax /= 2;
1251 return (-1);
1252 }
1253 }
1254 ctxt->nsTab[ctxt->nsNr++] = prefix;
1255 ctxt->nsTab[ctxt->nsNr++] = URL;
1256 return (ctxt->nsNr);
1257}
1258/**
1259 * nsPop:
1260 * @ctxt: an XML parser context
1261 * @nr: the number to pop
1262 *
1263 * Pops the top @nr parser prefix/namespace from the ns stack
1264 *
1265 * Returns the number of namespaces removed
1266 */
1267static int
1268nsPop(xmlParserCtxtPtr ctxt, int nr)
1269{
1270 int i;
1271
1272 if (ctxt->nsTab == NULL) return(0);
1273 if (ctxt->nsNr < nr) {
1274 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1275 nr = ctxt->nsNr;
1276 }
1277 if (ctxt->nsNr <= 0)
1278 return (0);
1279
1280 for (i = 0;i < nr;i++) {
1281 ctxt->nsNr--;
1282 ctxt->nsTab[ctxt->nsNr] = NULL;
1283 }
1284 return(nr);
1285}
1286#endif
1287
1288static int
1289xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1290 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001291 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001292 int maxatts;
1293
1294 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001295 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001296 atts = (const xmlChar **)
1297 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001299 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1301 if (attallocs == NULL) goto mem_error;
1302 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001303 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 } else if (nr + 5 > ctxt->maxatts) {
1305 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001306 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1307 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001309 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001310 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1311 (maxatts / 5) * sizeof(int));
1312 if (attallocs == NULL) goto mem_error;
1313 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001314 ctxt->maxatts = maxatts;
1315 }
1316 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001317mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001318 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001319 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001320}
1321
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001322/**
1323 * inputPush:
1324 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001325 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001326 *
1327 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001328 *
1329 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001330 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001331int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001332inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1333{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001334 if ((ctxt == NULL) || (value == NULL))
1335 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001336 if (ctxt->inputNr >= ctxt->inputMax) {
1337 ctxt->inputMax *= 2;
1338 ctxt->inputTab =
1339 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1340 ctxt->inputMax *
1341 sizeof(ctxt->inputTab[0]));
1342 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001343 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001344 return (0);
1345 }
1346 }
1347 ctxt->inputTab[ctxt->inputNr] = value;
1348 ctxt->input = value;
1349 return (ctxt->inputNr++);
1350}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001351/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001352 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001353 * @ctxt: an XML parser context
1354 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001356 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001358 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001359xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001360inputPop(xmlParserCtxtPtr ctxt)
1361{
1362 xmlParserInputPtr ret;
1363
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001364 if (ctxt == NULL)
1365 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001366 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001367 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001368 ctxt->inputNr--;
1369 if (ctxt->inputNr > 0)
1370 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1371 else
1372 ctxt->input = NULL;
1373 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001374 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001375 return (ret);
1376}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001377/**
1378 * nodePush:
1379 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001380 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001381 *
1382 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001383 *
1384 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001385 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001386int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1388{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001389 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001390 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001391 xmlNodePtr *tmp;
1392
1393 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1394 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001395 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001396 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001397 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001398 return (0);
1399 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001400 ctxt->nodeTab = tmp;
1401 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001402 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001403 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001404 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001405 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1406 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001407 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001408 return(0);
1409 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001410 ctxt->nodeTab[ctxt->nodeNr] = value;
1411 ctxt->node = value;
1412 return (ctxt->nodeNr++);
1413}
1414/**
1415 * nodePop:
1416 * @ctxt: an XML parser context
1417 *
1418 * Pops the top element node from the node stack
1419 *
1420 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001421 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001422xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001423nodePop(xmlParserCtxtPtr ctxt)
1424{
1425 xmlNodePtr ret;
1426
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001427 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001428 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001429 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001430 ctxt->nodeNr--;
1431 if (ctxt->nodeNr > 0)
1432 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1433 else
1434 ctxt->node = NULL;
1435 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001436 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001437 return (ret);
1438}
Daniel Veillarda2351322004-06-27 12:08:10 +00001439
1440#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001441/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001442 * nameNsPush:
1443 * @ctxt: an XML parser context
1444 * @value: the element name
1445 * @prefix: the element prefix
1446 * @URI: the element namespace name
1447 *
1448 * Pushes a new element name/prefix/URL on top of the name stack
1449 *
1450 * Returns -1 in case of error, the index in the stack otherwise
1451 */
1452static int
1453nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1454 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1455{
1456 if (ctxt->nameNr >= ctxt->nameMax) {
1457 const xmlChar * *tmp;
1458 void **tmp2;
1459 ctxt->nameMax *= 2;
1460 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1461 ctxt->nameMax *
1462 sizeof(ctxt->nameTab[0]));
1463 if (tmp == NULL) {
1464 ctxt->nameMax /= 2;
1465 goto mem_error;
1466 }
1467 ctxt->nameTab = tmp;
1468 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1469 ctxt->nameMax * 3 *
1470 sizeof(ctxt->pushTab[0]));
1471 if (tmp2 == NULL) {
1472 ctxt->nameMax /= 2;
1473 goto mem_error;
1474 }
1475 ctxt->pushTab = tmp2;
1476 }
1477 ctxt->nameTab[ctxt->nameNr] = value;
1478 ctxt->name = value;
1479 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1480 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001481 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001482 return (ctxt->nameNr++);
1483mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001484 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001485 return (-1);
1486}
1487/**
1488 * nameNsPop:
1489 * @ctxt: an XML parser context
1490 *
1491 * Pops the top element/prefix/URI name from the name stack
1492 *
1493 * Returns the name just removed
1494 */
1495static const xmlChar *
1496nameNsPop(xmlParserCtxtPtr ctxt)
1497{
1498 const xmlChar *ret;
1499
1500 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001501 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001502 ctxt->nameNr--;
1503 if (ctxt->nameNr > 0)
1504 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1505 else
1506 ctxt->name = NULL;
1507 ret = ctxt->nameTab[ctxt->nameNr];
1508 ctxt->nameTab[ctxt->nameNr] = NULL;
1509 return (ret);
1510}
Daniel Veillarda2351322004-06-27 12:08:10 +00001511#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001512
1513/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 * namePush:
1515 * @ctxt: an XML parser context
1516 * @value: the element name
1517 *
1518 * Pushes a new element name on top of the name stack
1519 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001520 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001522int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001523namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001524{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001525 if (ctxt == NULL) return (-1);
1526
Daniel Veillard1c732d22002-11-30 11:22:59 +00001527 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001528 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001529 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001530 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 ctxt->nameMax *
1532 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001533 if (tmp == NULL) {
1534 ctxt->nameMax /= 2;
1535 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001536 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 }
1539 ctxt->nameTab[ctxt->nameNr] = value;
1540 ctxt->name = value;
1541 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001542mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001544 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545}
1546/**
1547 * namePop:
1548 * @ctxt: an XML parser context
1549 *
1550 * Pops the top element name from the name stack
1551 *
1552 * Returns the name just removed
1553 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001554const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001555namePop(xmlParserCtxtPtr ctxt)
1556{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001557 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001558
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001559 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1560 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001561 ctxt->nameNr--;
1562 if (ctxt->nameNr > 0)
1563 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1564 else
1565 ctxt->name = NULL;
1566 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001567 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001568 return (ret);
1569}
Owen Taylor3473f882001-02-23 17:55:21 +00001570
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001571static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001572 if (ctxt->spaceNr >= ctxt->spaceMax) {
1573 ctxt->spaceMax *= 2;
1574 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1575 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1576 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001577 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001578 return(0);
1579 }
1580 }
1581 ctxt->spaceTab[ctxt->spaceNr] = val;
1582 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1583 return(ctxt->spaceNr++);
1584}
1585
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001587 int ret;
1588 if (ctxt->spaceNr <= 0) return(0);
1589 ctxt->spaceNr--;
1590 if (ctxt->spaceNr > 0)
1591 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1592 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001593 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001594 ret = ctxt->spaceTab[ctxt->spaceNr];
1595 ctxt->spaceTab[ctxt->spaceNr] = -1;
1596 return(ret);
1597}
1598
1599/*
1600 * Macros for accessing the content. Those should be used only by the parser,
1601 * and not exported.
1602 *
1603 * Dirty macros, i.e. one often need to make assumption on the context to
1604 * use them
1605 *
1606 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1607 * To be used with extreme caution since operations consuming
1608 * characters may move the input buffer to a different location !
1609 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1610 * This should be used internally by the parser
1611 * only to compare to ASCII values otherwise it would break when
1612 * running with UTF-8 encoding.
1613 * RAW same as CUR but in the input buffer, bypass any token
1614 * extraction that may have been done
1615 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1616 * to compare on ASCII based substring.
1617 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001618 * strings without newlines within the parser.
1619 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1620 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001621 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1622 *
1623 * NEXT Skip to the next character, this does the proper decoding
1624 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001625 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001626 * CUR_CHAR(l) returns the current unicode character (int), set l
1627 * to the number of xmlChars used for the encoding [0-5].
1628 * CUR_SCHAR same but operate on a string instead of the context
1629 * COPY_BUF copy the current unicode char to the target buffer, increment
1630 * the index
1631 * GROW, SHRINK handling of input buffers
1632 */
1633
Daniel Veillardfdc91562002-07-01 21:52:03 +00001634#define RAW (*ctxt->input->cur)
1635#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001636#define NXT(val) ctxt->input->cur[(val)]
1637#define CUR_PTR ctxt->input->cur
1638
Daniel Veillarda07050d2003-10-19 14:46:32 +00001639#define CMP4( s, c1, c2, c3, c4 ) \
1640 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1641 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1642#define CMP5( s, c1, c2, c3, c4, c5 ) \
1643 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1644#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1645 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1646#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1647 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1648#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1649 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1650#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1651 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1652 ((unsigned char *) s)[ 8 ] == c9 )
1653#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1654 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1655 ((unsigned char *) s)[ 9 ] == c10 )
1656
Owen Taylor3473f882001-02-23 17:55:21 +00001657#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001658 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001659 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001660 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001661 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1662 xmlPopInput(ctxt); \
1663 } while (0)
1664
Daniel Veillard0b787f32004-03-26 17:29:53 +00001665#define SKIPL(val) do { \
1666 int skipl; \
1667 for(skipl=0; skipl<val; skipl++) { \
1668 if (*(ctxt->input->cur) == '\n') { \
1669 ctxt->input->line++; ctxt->input->col = 1; \
1670 } else ctxt->input->col++; \
1671 ctxt->nbChars++; \
1672 ctxt->input->cur++; \
1673 } \
1674 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1675 if ((*ctxt->input->cur == 0) && \
1676 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1677 xmlPopInput(ctxt); \
1678 } while (0)
1679
Daniel Veillarda880b122003-04-21 21:36:41 +00001680#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001681 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1682 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001683 xmlSHRINK (ctxt);
1684
1685static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1686 xmlParserInputShrink(ctxt->input);
1687 if ((*ctxt->input->cur == 0) &&
1688 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1689 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001690 }
Owen Taylor3473f882001-02-23 17:55:21 +00001691
Daniel Veillarda880b122003-04-21 21:36:41 +00001692#define GROW if ((ctxt->progressive == 0) && \
1693 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001694 xmlGROW (ctxt);
1695
1696static void xmlGROW (xmlParserCtxtPtr ctxt) {
1697 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1698 if ((*ctxt->input->cur == 0) &&
1699 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1700 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001701}
Owen Taylor3473f882001-02-23 17:55:21 +00001702
1703#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1704
1705#define NEXT xmlNextChar(ctxt)
1706
Daniel Veillard21a0f912001-02-25 19:54:14 +00001707#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001708 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001709 ctxt->input->cur++; \
1710 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001711 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001712 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1713 }
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715#define NEXTL(l) do { \
1716 if (*(ctxt->input->cur) == '\n') { \
1717 ctxt->input->line++; ctxt->input->col = 1; \
1718 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001719 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001720 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001721 } while (0)
1722
1723#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1724#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1725
1726#define COPY_BUF(l,b,i,v) \
1727 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001728 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001729
1730/**
1731 * xmlSkipBlankChars:
1732 * @ctxt: the XML parser context
1733 *
1734 * skip all blanks character found at that point in the input streams.
1735 * It pops up finished entities in the process if allowable at that point.
1736 *
1737 * Returns the number of space chars skipped
1738 */
1739
1740int
1741xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001742 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001743
1744 /*
1745 * It's Okay to use CUR/NEXT here since all the blanks are on
1746 * the ASCII range.
1747 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001748 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1749 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001750 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001751 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001752 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001753 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001754 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001755 if (*cur == '\n') {
1756 ctxt->input->line++; ctxt->input->col = 1;
1757 }
1758 cur++;
1759 res++;
1760 if (*cur == 0) {
1761 ctxt->input->cur = cur;
1762 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1763 cur = ctxt->input->cur;
1764 }
1765 }
1766 ctxt->input->cur = cur;
1767 } else {
1768 int cur;
1769 do {
1770 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001771 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001772 NEXT;
1773 cur = CUR;
1774 res++;
1775 }
1776 while ((cur == 0) && (ctxt->inputNr > 1) &&
1777 (ctxt->instate != XML_PARSER_COMMENT)) {
1778 xmlPopInput(ctxt);
1779 cur = CUR;
1780 }
1781 /*
1782 * Need to handle support of entities branching here
1783 */
1784 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1785 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1786 }
Owen Taylor3473f882001-02-23 17:55:21 +00001787 return(res);
1788}
1789
1790/************************************************************************
1791 * *
1792 * Commodity functions to handle entities *
1793 * *
1794 ************************************************************************/
1795
1796/**
1797 * xmlPopInput:
1798 * @ctxt: an XML parser context
1799 *
1800 * xmlPopInput: the current input pointed by ctxt->input came to an end
1801 * pop it and return the next char.
1802 *
1803 * Returns the current xmlChar in the parser context
1804 */
1805xmlChar
1806xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001807 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001808 if (xmlParserDebugEntities)
1809 xmlGenericError(xmlGenericErrorContext,
1810 "Popping input %d\n", ctxt->inputNr);
1811 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001812 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1814 return(xmlPopInput(ctxt));
1815 return(CUR);
1816}
1817
1818/**
1819 * xmlPushInput:
1820 * @ctxt: an XML parser context
1821 * @input: an XML parser input fragment (entity, XML fragment ...).
1822 *
1823 * xmlPushInput: switch to a new input stream which is stacked on top
1824 * of the previous one(s).
1825 */
1826void
1827xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1828 if (input == NULL) return;
1829
1830 if (xmlParserDebugEntities) {
1831 if ((ctxt->input != NULL) && (ctxt->input->filename))
1832 xmlGenericError(xmlGenericErrorContext,
1833 "%s(%d): ", ctxt->input->filename,
1834 ctxt->input->line);
1835 xmlGenericError(xmlGenericErrorContext,
1836 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1837 }
1838 inputPush(ctxt, input);
1839 GROW;
1840}
1841
1842/**
1843 * xmlParseCharRef:
1844 * @ctxt: an XML parser context
1845 *
1846 * parse Reference declarations
1847 *
1848 * [66] CharRef ::= '&#' [0-9]+ ';' |
1849 * '&#x' [0-9a-fA-F]+ ';'
1850 *
1851 * [ WFC: Legal Character ]
1852 * Characters referred to using character references must match the
1853 * production for Char.
1854 *
1855 * Returns the value parsed (as an int), 0 in case of error
1856 */
1857int
1858xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001859 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001860 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001861 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001862
Owen Taylor3473f882001-02-23 17:55:21 +00001863 /*
1864 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1865 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001866 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001867 (NXT(2) == 'x')) {
1868 SKIP(3);
1869 GROW;
1870 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001871 if (count++ > 20) {
1872 count = 0;
1873 GROW;
1874 }
1875 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001876 val = val * 16 + (CUR - '0');
1877 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1878 val = val * 16 + (CUR - 'a') + 10;
1879 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1880 val = val * 16 + (CUR - 'A') + 10;
1881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001882 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001883 val = 0;
1884 break;
1885 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001886 if (val > 0x10FFFF)
1887 outofrange = val;
1888
Owen Taylor3473f882001-02-23 17:55:21 +00001889 NEXT;
1890 count++;
1891 }
1892 if (RAW == ';') {
1893 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001894 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001895 ctxt->nbChars ++;
1896 ctxt->input->cur++;
1897 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001898 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001899 SKIP(2);
1900 GROW;
1901 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001902 if (count++ > 20) {
1903 count = 0;
1904 GROW;
1905 }
1906 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001907 val = val * 10 + (CUR - '0');
1908 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001909 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001910 val = 0;
1911 break;
1912 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001913 if (val > 0x10FFFF)
1914 outofrange = val;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 NEXT;
1917 count++;
1918 }
1919 if (RAW == ';') {
1920 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001921 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001922 ctxt->nbChars ++;
1923 ctxt->input->cur++;
1924 }
1925 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001927 }
1928
1929 /*
1930 * [ WFC: Legal Character ]
1931 * Characters referred to using character references must match the
1932 * production for Char.
1933 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001934 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001935 return(val);
1936 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001937 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1938 "xmlParseCharRef: invalid xmlChar value %d\n",
1939 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001940 }
1941 return(0);
1942}
1943
1944/**
1945 * xmlParseStringCharRef:
1946 * @ctxt: an XML parser context
1947 * @str: a pointer to an index in the string
1948 *
1949 * parse Reference declarations, variant parsing from a string rather
1950 * than an an input flow.
1951 *
1952 * [66] CharRef ::= '&#' [0-9]+ ';' |
1953 * '&#x' [0-9a-fA-F]+ ';'
1954 *
1955 * [ WFC: Legal Character ]
1956 * Characters referred to using character references must match the
1957 * production for Char.
1958 *
1959 * Returns the value parsed (as an int), 0 in case of error, str will be
1960 * updated to the current value of the index
1961 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001962static int
Owen Taylor3473f882001-02-23 17:55:21 +00001963xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1964 const xmlChar *ptr;
1965 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001966 unsigned int val = 0;
1967 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001968
1969 if ((str == NULL) || (*str == NULL)) return(0);
1970 ptr = *str;
1971 cur = *ptr;
1972 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1973 ptr += 3;
1974 cur = *ptr;
1975 while (cur != ';') { /* Non input consuming loop */
1976 if ((cur >= '0') && (cur <= '9'))
1977 val = val * 16 + (cur - '0');
1978 else if ((cur >= 'a') && (cur <= 'f'))
1979 val = val * 16 + (cur - 'a') + 10;
1980 else if ((cur >= 'A') && (cur <= 'F'))
1981 val = val * 16 + (cur - 'A') + 10;
1982 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001983 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001984 val = 0;
1985 break;
1986 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001987 if (val > 0x10FFFF)
1988 outofrange = val;
1989
Owen Taylor3473f882001-02-23 17:55:21 +00001990 ptr++;
1991 cur = *ptr;
1992 }
1993 if (cur == ';')
1994 ptr++;
1995 } else if ((cur == '&') && (ptr[1] == '#')){
1996 ptr += 2;
1997 cur = *ptr;
1998 while (cur != ';') { /* Non input consuming loops */
1999 if ((cur >= '0') && (cur <= '9'))
2000 val = val * 10 + (cur - '0');
2001 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002002 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003 val = 0;
2004 break;
2005 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002006 if (val > 0x10FFFF)
2007 outofrange = val;
2008
Owen Taylor3473f882001-02-23 17:55:21 +00002009 ptr++;
2010 cur = *ptr;
2011 }
2012 if (cur == ';')
2013 ptr++;
2014 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002015 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002016 return(0);
2017 }
2018 *str = ptr;
2019
2020 /*
2021 * [ WFC: Legal Character ]
2022 * Characters referred to using character references must match the
2023 * production for Char.
2024 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002025 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002026 return(val);
2027 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002028 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2029 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2030 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 }
2032 return(0);
2033}
2034
2035/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002036 * xmlNewBlanksWrapperInputStream:
2037 * @ctxt: an XML parser context
2038 * @entity: an Entity pointer
2039 *
2040 * Create a new input stream for wrapping
2041 * blanks around a PEReference
2042 *
2043 * Returns the new input stream or NULL
2044 */
2045
2046static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2047
Daniel Veillardf4862f02002-09-10 11:13:43 +00002048static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2050 xmlParserInputPtr input;
2051 xmlChar *buffer;
2052 size_t length;
2053 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002054 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2055 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002056 return(NULL);
2057 }
2058 if (xmlParserDebugEntities)
2059 xmlGenericError(xmlGenericErrorContext,
2060 "new blanks wrapper for entity: %s\n", entity->name);
2061 input = xmlNewInputStream(ctxt);
2062 if (input == NULL) {
2063 return(NULL);
2064 }
2065 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002066 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002067 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002068 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002069 return(NULL);
2070 }
2071 buffer [0] = ' ';
2072 buffer [1] = '%';
2073 buffer [length-3] = ';';
2074 buffer [length-2] = ' ';
2075 buffer [length-1] = 0;
2076 memcpy(buffer + 2, entity->name, length - 5);
2077 input->free = deallocblankswrapper;
2078 input->base = buffer;
2079 input->cur = buffer;
2080 input->length = length;
2081 input->end = &buffer[length];
2082 return(input);
2083}
2084
2085/**
Owen Taylor3473f882001-02-23 17:55:21 +00002086 * xmlParserHandlePEReference:
2087 * @ctxt: the parser context
2088 *
2089 * [69] PEReference ::= '%' Name ';'
2090 *
2091 * [ WFC: No Recursion ]
2092 * A parsed entity must not contain a recursive
2093 * reference to itself, either directly or indirectly.
2094 *
2095 * [ WFC: Entity Declared ]
2096 * In a document without any DTD, a document with only an internal DTD
2097 * subset which contains no parameter entity references, or a document
2098 * with "standalone='yes'", ... ... The declaration of a parameter
2099 * entity must precede any reference to it...
2100 *
2101 * [ VC: Entity Declared ]
2102 * In a document with an external subset or external parameter entities
2103 * with "standalone='no'", ... ... The declaration of a parameter entity
2104 * must precede any reference to it...
2105 *
2106 * [ WFC: In DTD ]
2107 * Parameter-entity references may only appear in the DTD.
2108 * NOTE: misleading but this is handled.
2109 *
2110 * A PEReference may have been detected in the current input stream
2111 * the handling is done accordingly to
2112 * http://www.w3.org/TR/REC-xml#entproc
2113 * i.e.
2114 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002115 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002116 */
2117void
2118xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002119 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002120 xmlEntityPtr entity = NULL;
2121 xmlParserInputPtr input;
2122
Owen Taylor3473f882001-02-23 17:55:21 +00002123 if (RAW != '%') return;
2124 switch(ctxt->instate) {
2125 case XML_PARSER_CDATA_SECTION:
2126 return;
2127 case XML_PARSER_COMMENT:
2128 return;
2129 case XML_PARSER_START_TAG:
2130 return;
2131 case XML_PARSER_END_TAG:
2132 return;
2133 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002134 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002135 return;
2136 case XML_PARSER_PROLOG:
2137 case XML_PARSER_START:
2138 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002139 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002140 return;
2141 case XML_PARSER_ENTITY_DECL:
2142 case XML_PARSER_CONTENT:
2143 case XML_PARSER_ATTRIBUTE_VALUE:
2144 case XML_PARSER_PI:
2145 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002146 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002147 /* we just ignore it there */
2148 return;
2149 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002150 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002151 return;
2152 case XML_PARSER_ENTITY_VALUE:
2153 /*
2154 * NOTE: in the case of entity values, we don't do the
2155 * substitution here since we need the literal
2156 * entity value to be able to save the internal
2157 * subset of the document.
2158 * This will be handled by xmlStringDecodeEntities
2159 */
2160 return;
2161 case XML_PARSER_DTD:
2162 /*
2163 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2164 * In the internal DTD subset, parameter-entity references
2165 * can occur only where markup declarations can occur, not
2166 * within markup declarations.
2167 * In that case this is handled in xmlParseMarkupDecl
2168 */
2169 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2170 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002171 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002172 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002173 break;
2174 case XML_PARSER_IGNORE:
2175 return;
2176 }
2177
2178 NEXT;
2179 name = xmlParseName(ctxt);
2180 if (xmlParserDebugEntities)
2181 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002182 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002183 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 } else {
2186 if (RAW == ';') {
2187 NEXT;
2188 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2189 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2190 if (entity == NULL) {
2191
2192 /*
2193 * [ WFC: Entity Declared ]
2194 * In a document without any DTD, a document with only an
2195 * internal DTD subset which contains no parameter entity
2196 * references, or a document with "standalone='yes'", ...
2197 * ... The declaration of a parameter entity must precede
2198 * any reference to it...
2199 */
2200 if ((ctxt->standalone == 1) ||
2201 ((ctxt->hasExternalSubset == 0) &&
2202 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002203 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002204 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002205 } else {
2206 /*
2207 * [ VC: Entity Declared ]
2208 * In a document with an external subset or external
2209 * parameter entities with "standalone='no'", ...
2210 * ... The declaration of a parameter entity must precede
2211 * any reference to it...
2212 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002213 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2214 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2215 "PEReference: %%%s; not found\n",
2216 name);
2217 } else
2218 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2219 "PEReference: %%%s; not found\n",
2220 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002221 ctxt->valid = 0;
2222 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002223 } else if (ctxt->input->free != deallocblankswrapper) {
2224 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2225 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 } else {
2227 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2228 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002229 xmlChar start[4];
2230 xmlCharEncoding enc;
2231
Owen Taylor3473f882001-02-23 17:55:21 +00002232 /*
2233 * handle the extra spaces added before and after
2234 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002235 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002236 */
2237 input = xmlNewEntityInputStream(ctxt, entity);
2238 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002239
2240 /*
2241 * Get the 4 first bytes and decode the charset
2242 * if enc != XML_CHAR_ENCODING_NONE
2243 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002244 * Note that, since we may have some non-UTF8
2245 * encoding (like UTF16, bug 135229), the 'length'
2246 * is not known, but we can calculate based upon
2247 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002248 */
2249 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002250 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002251 start[0] = RAW;
2252 start[1] = NXT(1);
2253 start[2] = NXT(2);
2254 start[3] = NXT(3);
2255 enc = xmlDetectCharEncoding(start, 4);
2256 if (enc != XML_CHAR_ENCODING_NONE) {
2257 xmlSwitchEncoding(ctxt, enc);
2258 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002259 }
2260
Owen Taylor3473f882001-02-23 17:55:21 +00002261 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002262 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2263 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002264 xmlParseTextDecl(ctxt);
2265 }
Owen Taylor3473f882001-02-23 17:55:21 +00002266 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002267 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2268 "PEReference: %s is not a parameter entity\n",
2269 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 }
2272 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002273 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 }
2276}
2277
2278/*
2279 * Macro used to grow the current buffer.
2280 */
2281#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002282 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002283 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002284 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002286 if (tmp == NULL) goto mem_error; \
2287 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002288}
2289
2290/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002291 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002292 * @ctxt: the parser context
2293 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002294 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002295 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2296 * @end: an end marker xmlChar, 0 if none
2297 * @end2: an end marker xmlChar, 0 if none
2298 * @end3: an end marker xmlChar, 0 if none
2299 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002300 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002301 *
2302 * [67] Reference ::= EntityRef | CharRef
2303 *
2304 * [69] PEReference ::= '%' Name ';'
2305 *
2306 * Returns A newly allocated string with the substitution done. The caller
2307 * must deallocate it !
2308 */
2309xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002310xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2311 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002312 xmlChar *buffer = NULL;
2313 int buffer_size = 0;
2314
2315 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002316 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002317 xmlEntityPtr ent;
2318 int c,l;
2319 int nbchars = 0;
2320
Daniel Veillarda82b1822004-11-08 16:24:57 +00002321 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002322 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002323 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002324
2325 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002326 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002327 return(NULL);
2328 }
2329
2330 /*
2331 * allocate a translation buffer.
2332 */
2333 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002334 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002335 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002336
2337 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002338 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002339 * we are operating on already parsed values.
2340 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002341 if (str < last)
2342 c = CUR_SCHAR(str, l);
2343 else
2344 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002345 while ((c != 0) && (c != end) && /* non input consuming loop */
2346 (c != end2) && (c != end3)) {
2347
2348 if (c == 0) break;
2349 if ((c == '&') && (str[1] == '#')) {
2350 int val = xmlParseStringCharRef(ctxt, &str);
2351 if (val != 0) {
2352 COPY_BUF(0,buffer,nbchars,val);
2353 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002354 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2355 growBuffer(buffer);
2356 }
Owen Taylor3473f882001-02-23 17:55:21 +00002357 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2358 if (xmlParserDebugEntities)
2359 xmlGenericError(xmlGenericErrorContext,
2360 "String decoding Entity Reference: %.30s\n",
2361 str);
2362 ent = xmlParseStringEntityRef(ctxt, &str);
2363 if ((ent != NULL) &&
2364 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2365 if (ent->content != NULL) {
2366 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002367 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2368 growBuffer(buffer);
2369 }
Owen Taylor3473f882001-02-23 17:55:21 +00002370 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002371 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2372 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002373 }
2374 } else if ((ent != NULL) && (ent->content != NULL)) {
2375 xmlChar *rep;
2376
2377 ctxt->depth++;
2378 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2379 0, 0, 0);
2380 ctxt->depth--;
2381 if (rep != NULL) {
2382 current = rep;
2383 while (*current != 0) { /* non input consuming loop */
2384 buffer[nbchars++] = *current++;
2385 if (nbchars >
2386 buffer_size - XML_PARSER_BUFFER_SIZE) {
2387 growBuffer(buffer);
2388 }
2389 }
2390 xmlFree(rep);
2391 }
2392 } else if (ent != NULL) {
2393 int i = xmlStrlen(ent->name);
2394 const xmlChar *cur = ent->name;
2395
2396 buffer[nbchars++] = '&';
2397 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2398 growBuffer(buffer);
2399 }
2400 for (;i > 0;i--)
2401 buffer[nbchars++] = *cur++;
2402 buffer[nbchars++] = ';';
2403 }
2404 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2405 if (xmlParserDebugEntities)
2406 xmlGenericError(xmlGenericErrorContext,
2407 "String decoding PE Reference: %.30s\n", str);
2408 ent = xmlParseStringPEReference(ctxt, &str);
2409 if (ent != NULL) {
2410 xmlChar *rep;
2411
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002412 if (ent->content == NULL) {
2413 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2414 }
2415 }
Owen Taylor3473f882001-02-23 17:55:21 +00002416 ctxt->depth++;
2417 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2418 0, 0, 0);
2419 ctxt->depth--;
2420 if (rep != NULL) {
2421 current = rep;
2422 while (*current != 0) { /* non input consuming loop */
2423 buffer[nbchars++] = *current++;
2424 if (nbchars >
2425 buffer_size - XML_PARSER_BUFFER_SIZE) {
2426 growBuffer(buffer);
2427 }
2428 }
2429 xmlFree(rep);
2430 }
2431 }
2432 } else {
2433 COPY_BUF(l,buffer,nbchars,c);
2434 str += l;
2435 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2436 growBuffer(buffer);
2437 }
2438 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002439 if (str < last)
2440 c = CUR_SCHAR(str, l);
2441 else
2442 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002443 }
2444 buffer[nbchars++] = 0;
2445 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002446
2447mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002448 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002449 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450}
2451
Daniel Veillarde57ec792003-09-10 10:50:59 +00002452/**
2453 * xmlStringDecodeEntities:
2454 * @ctxt: the parser context
2455 * @str: the input string
2456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
2461 * Takes a entity string content and process to do the adequate substitutions.
2462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
2471xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2472 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002473 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002474 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2475 end, end2, end3));
2476}
Owen Taylor3473f882001-02-23 17:55:21 +00002477
2478/************************************************************************
2479 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002480 * Commodity functions, cleanup needed ? *
2481 * *
2482 ************************************************************************/
2483
2484/**
2485 * areBlanks:
2486 * @ctxt: an XML parser context
2487 * @str: a xmlChar *
2488 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002489 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002490 *
2491 * Is this a sequence of blank chars that one can ignore ?
2492 *
2493 * Returns 1 if ignorable 0 otherwise.
2494 */
2495
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002496static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2497 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 int i, ret;
2499 xmlNodePtr lastChild;
2500
Daniel Veillard05c13a22001-09-09 08:38:09 +00002501 /*
2502 * Don't spend time trying to differentiate them, the same callback is
2503 * used !
2504 */
2505 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002506 return(0);
2507
Owen Taylor3473f882001-02-23 17:55:21 +00002508 /*
2509 * Check for xml:space value.
2510 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002511 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2512 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002513 return(0);
2514
2515 /*
2516 * Check that the string is made of blanks
2517 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002518 if (blank_chars == 0) {
2519 for (i = 0;i < len;i++)
2520 if (!(IS_BLANK_CH(str[i]))) return(0);
2521 }
Owen Taylor3473f882001-02-23 17:55:21 +00002522
2523 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002524 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002525 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002526 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002527 if (ctxt->myDoc != NULL) {
2528 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2529 if (ret == 0) return(1);
2530 if (ret == 1) return(0);
2531 }
2532
2533 /*
2534 * Otherwise, heuristic :-\
2535 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002536 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002537 if ((ctxt->node->children == NULL) &&
2538 (RAW == '<') && (NXT(1) == '/')) return(0);
2539
2540 lastChild = xmlGetLastChild(ctxt->node);
2541 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002542 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2543 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002544 } else if (xmlNodeIsText(lastChild))
2545 return(0);
2546 else if ((ctxt->node->children != NULL) &&
2547 (xmlNodeIsText(ctxt->node->children)))
2548 return(0);
2549 return(1);
2550}
2551
Owen Taylor3473f882001-02-23 17:55:21 +00002552/************************************************************************
2553 * *
2554 * Extra stuff for namespace support *
2555 * Relates to http://www.w3.org/TR/WD-xml-names *
2556 * *
2557 ************************************************************************/
2558
2559/**
2560 * xmlSplitQName:
2561 * @ctxt: an XML parser context
2562 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002563 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002564 *
2565 * parse an UTF8 encoded XML qualified name string
2566 *
2567 * [NS 5] QName ::= (Prefix ':')? LocalPart
2568 *
2569 * [NS 6] Prefix ::= NCName
2570 *
2571 * [NS 7] LocalPart ::= NCName
2572 *
2573 * Returns the local part, and prefix is updated
2574 * to get the Prefix if any.
2575 */
2576
2577xmlChar *
2578xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2579 xmlChar buf[XML_MAX_NAMELEN + 5];
2580 xmlChar *buffer = NULL;
2581 int len = 0;
2582 int max = XML_MAX_NAMELEN;
2583 xmlChar *ret = NULL;
2584 const xmlChar *cur = name;
2585 int c;
2586
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002587 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 *prefix = NULL;
2589
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002590 if (cur == NULL) return(NULL);
2591
Owen Taylor3473f882001-02-23 17:55:21 +00002592#ifndef XML_XML_NAMESPACE
2593 /* xml: prefix is not really a namespace */
2594 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2595 (cur[2] == 'l') && (cur[3] == ':'))
2596 return(xmlStrdup(name));
2597#endif
2598
Daniel Veillard597bc482003-07-24 16:08:28 +00002599 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002600 if (cur[0] == ':')
2601 return(xmlStrdup(name));
2602
2603 c = *cur++;
2604 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2605 buf[len++] = c;
2606 c = *cur++;
2607 }
2608 if (len >= max) {
2609 /*
2610 * Okay someone managed to make a huge name, so he's ready to pay
2611 * for the processing speed.
2612 */
2613 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002614
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002615 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002616 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002617 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002618 return(NULL);
2619 }
2620 memcpy(buffer, buf, len);
2621 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2622 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 xmlChar *tmp;
2624
Owen Taylor3473f882001-02-23 17:55:21 +00002625 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002626 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002627 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002628 if (tmp == NULL) {
2629 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002630 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002631 return(NULL);
2632 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002633 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002634 }
2635 buffer[len++] = c;
2636 c = *cur++;
2637 }
2638 buffer[len] = 0;
2639 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002640
Daniel Veillard597bc482003-07-24 16:08:28 +00002641 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002642 if (buffer != NULL)
2643 xmlFree(buffer);
2644 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002645 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002646 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002647
Owen Taylor3473f882001-02-23 17:55:21 +00002648 if (buffer == NULL)
2649 ret = xmlStrndup(buf, len);
2650 else {
2651 ret = buffer;
2652 buffer = NULL;
2653 max = XML_MAX_NAMELEN;
2654 }
2655
2656
2657 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002658 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002659 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002660 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002661 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002662 }
Owen Taylor3473f882001-02-23 17:55:21 +00002663 len = 0;
2664
Daniel Veillardbb284f42002-10-16 18:02:47 +00002665 /*
2666 * Check that the first character is proper to start
2667 * a new name
2668 */
2669 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2670 ((c >= 0x41) && (c <= 0x5A)) ||
2671 (c == '_') || (c == ':'))) {
2672 int l;
2673 int first = CUR_SCHAR(cur, l);
2674
2675 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002676 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002677 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002678 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002679 }
2680 }
2681 cur++;
2682
Owen Taylor3473f882001-02-23 17:55:21 +00002683 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2684 buf[len++] = c;
2685 c = *cur++;
2686 }
2687 if (len >= max) {
2688 /*
2689 * Okay someone managed to make a huge name, so he's ready to pay
2690 * for the processing speed.
2691 */
2692 max = len * 2;
2693
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002694 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002695 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002696 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002697 return(NULL);
2698 }
2699 memcpy(buffer, buf, len);
2700 while (c != 0) { /* tested bigname2.xml */
2701 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002702 xmlChar *tmp;
2703
Owen Taylor3473f882001-02-23 17:55:21 +00002704 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002705 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002706 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002707 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002708 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002709 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 return(NULL);
2711 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002712 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 }
2714 buffer[len++] = c;
2715 c = *cur++;
2716 }
2717 buffer[len] = 0;
2718 }
2719
2720 if (buffer == NULL)
2721 ret = xmlStrndup(buf, len);
2722 else {
2723 ret = buffer;
2724 }
2725 }
2726
2727 return(ret);
2728}
2729
2730/************************************************************************
2731 * *
2732 * The parser itself *
2733 * Relates to http://www.w3.org/TR/REC-xml *
2734 * *
2735 ************************************************************************/
2736
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002737static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002738static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002739 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002740
Owen Taylor3473f882001-02-23 17:55:21 +00002741/**
2742 * xmlParseName:
2743 * @ctxt: an XML parser context
2744 *
2745 * parse an XML name.
2746 *
2747 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2748 * CombiningChar | Extender
2749 *
2750 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2751 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002752 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002753 *
2754 * Returns the Name parsed or NULL
2755 */
2756
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002757const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002758xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002759 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002760 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 int count = 0;
2762
2763 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 /*
2766 * Accelerator for simple ASCII names
2767 */
2768 in = ctxt->input->cur;
2769 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2770 ((*in >= 0x41) && (*in <= 0x5A)) ||
2771 (*in == '_') || (*in == ':')) {
2772 in++;
2773 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2774 ((*in >= 0x41) && (*in <= 0x5A)) ||
2775 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002776 (*in == '_') || (*in == '-') ||
2777 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002778 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002779 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002780 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002781 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002782 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002783 ctxt->nbChars += count;
2784 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002785 if (ret == NULL)
2786 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002787 return(ret);
2788 }
2789 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002790 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002791}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002792
Daniel Veillard46de64e2002-05-29 08:21:33 +00002793/**
2794 * xmlParseNameAndCompare:
2795 * @ctxt: an XML parser context
2796 *
2797 * parse an XML name and compares for match
2798 * (specialized for endtag parsing)
2799 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002800 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2801 * and the name for mismatch
2802 */
2803
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002804static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002805xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002806 register const xmlChar *cmp = other;
2807 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002808 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002809
2810 GROW;
2811
2812 in = ctxt->input->cur;
2813 while (*in != 0 && *in == *cmp) {
2814 ++in;
2815 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002816 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002817 }
William M. Brack76e95df2003-10-18 16:20:14 +00002818 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002819 /* success */
2820 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002821 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002822 }
2823 /* failure (or end of input buffer), check with full function */
2824 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002825 /* strings coming from the dictionnary direct compare possible */
2826 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002827 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002828 }
2829 return ret;
2830}
2831
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002832static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002833xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002834 int len = 0, l;
2835 int c;
2836 int count = 0;
2837
2838 /*
2839 * Handler for more complex cases
2840 */
2841 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002842 c = CUR_CHAR(l);
2843 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2844 (!IS_LETTER(c) && (c != '_') &&
2845 (c != ':'))) {
2846 return(NULL);
2847 }
2848
2849 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002850 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002851 (c == '.') || (c == '-') ||
2852 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002853 (IS_COMBINING(c)) ||
2854 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002855 if (count++ > 100) {
2856 count = 0;
2857 GROW;
2858 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002859 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002860 NEXTL(l);
2861 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002862 }
Daniel Veillard96688262005-08-23 18:14:12 +00002863 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2864 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002865 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002866}
2867
2868/**
2869 * xmlParseStringName:
2870 * @ctxt: an XML parser context
2871 * @str: a pointer to the string pointer (IN/OUT)
2872 *
2873 * parse an XML name.
2874 *
2875 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2876 * CombiningChar | Extender
2877 *
2878 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2879 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002880 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002881 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002882 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002883 * is updated to the current location in the string.
2884 */
2885
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002886static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002887xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 const xmlChar *cur = *str;
2890 int len = 0, l;
2891 int c;
2892
2893 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002894 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002895 (c != ':')) {
2896 return(NULL);
2897 }
2898
William M. Brack871611b2003-10-18 04:53:14 +00002899 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002900 (c == '.') || (c == '-') ||
2901 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002902 (IS_COMBINING(c)) ||
2903 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002904 COPY_BUF(l,buf,len,c);
2905 cur += l;
2906 c = CUR_SCHAR(cur, l);
2907 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2908 /*
2909 * Okay someone managed to make a huge name, so he's ready to pay
2910 * for the processing speed.
2911 */
2912 xmlChar *buffer;
2913 int max = len * 2;
2914
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002915 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002916 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
2920 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002921 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002922 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002923 (c == '.') || (c == '-') ||
2924 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002925 (IS_COMBINING(c)) ||
2926 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002927 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002928 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002929 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002930 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002931 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002932 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002933 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002934 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002935 return(NULL);
2936 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002937 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002938 }
2939 COPY_BUF(l,buffer,len,c);
2940 cur += l;
2941 c = CUR_SCHAR(cur, l);
2942 }
2943 buffer[len] = 0;
2944 *str = cur;
2945 return(buffer);
2946 }
2947 }
2948 *str = cur;
2949 return(xmlStrndup(buf, len));
2950}
2951
2952/**
2953 * xmlParseNmtoken:
2954 * @ctxt: an XML parser context
2955 *
2956 * parse an XML Nmtoken.
2957 *
2958 * [7] Nmtoken ::= (NameChar)+
2959 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002960 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002961 *
2962 * Returns the Nmtoken parsed or NULL
2963 */
2964
2965xmlChar *
2966xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2967 xmlChar buf[XML_MAX_NAMELEN + 5];
2968 int len = 0, l;
2969 int c;
2970 int count = 0;
2971
2972 GROW;
2973 c = CUR_CHAR(l);
2974
William M. Brack871611b2003-10-18 04:53:14 +00002975 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002976 (c == '.') || (c == '-') ||
2977 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002978 (IS_COMBINING(c)) ||
2979 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002980 if (count++ > 100) {
2981 count = 0;
2982 GROW;
2983 }
2984 COPY_BUF(l,buf,len,c);
2985 NEXTL(l);
2986 c = CUR_CHAR(l);
2987 if (len >= XML_MAX_NAMELEN) {
2988 /*
2989 * Okay someone managed to make a huge token, so he's ready to pay
2990 * for the processing speed.
2991 */
2992 xmlChar *buffer;
2993 int max = len * 2;
2994
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002995 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002996 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002997 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002998 return(NULL);
2999 }
3000 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003001 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003006 if (count++ > 100) {
3007 count = 0;
3008 GROW;
3009 }
3010 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 xmlChar *tmp;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003015 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003017 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003018 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003022 }
3023 COPY_BUF(l,buffer,len,c);
3024 NEXTL(l);
3025 c = CUR_CHAR(l);
3026 }
3027 buffer[len] = 0;
3028 return(buffer);
3029 }
3030 }
3031 if (len == 0)
3032 return(NULL);
3033 return(xmlStrndup(buf, len));
3034}
3035
3036/**
3037 * xmlParseEntityValue:
3038 * @ctxt: an XML parser context
3039 * @orig: if non-NULL store a copy of the original entity value
3040 *
3041 * parse a value for ENTITY declarations
3042 *
3043 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3044 * "'" ([^%&'] | PEReference | Reference)* "'"
3045 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003046 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003047 */
3048
3049xmlChar *
3050xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3051 xmlChar *buf = NULL;
3052 int len = 0;
3053 int size = XML_PARSER_BUFFER_SIZE;
3054 int c, l;
3055 xmlChar stop;
3056 xmlChar *ret = NULL;
3057 const xmlChar *cur = NULL;
3058 xmlParserInputPtr input;
3059
3060 if (RAW == '"') stop = '"';
3061 else if (RAW == '\'') stop = '\'';
3062 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003063 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 return(NULL);
3065 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003066 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003067 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003068 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003069 return(NULL);
3070 }
3071
3072 /*
3073 * The content of the entity definition is copied in a buffer.
3074 */
3075
3076 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3077 input = ctxt->input;
3078 GROW;
3079 NEXT;
3080 c = CUR_CHAR(l);
3081 /*
3082 * NOTE: 4.4.5 Included in Literal
3083 * When a parameter entity reference appears in a literal entity
3084 * value, ... a single or double quote character in the replacement
3085 * text is always treated as a normal data character and will not
3086 * terminate the literal.
3087 * In practice it means we stop the loop only when back at parsing
3088 * the initial entity and the quote is found
3089 */
William M. Brack871611b2003-10-18 04:53:14 +00003090 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003091 (ctxt->input != input))) {
3092 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 xmlChar *tmp;
3094
Owen Taylor3473f882001-02-23 17:55:21 +00003095 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003096 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3097 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003098 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003099 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 return(NULL);
3101 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003102 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 COPY_BUF(l,buf,len,c);
3105 NEXTL(l);
3106 /*
3107 * Pop-up of finished entities.
3108 */
3109 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3110 xmlPopInput(ctxt);
3111
3112 GROW;
3113 c = CUR_CHAR(l);
3114 if (c == 0) {
3115 GROW;
3116 c = CUR_CHAR(l);
3117 }
3118 }
3119 buf[len] = 0;
3120
3121 /*
3122 * Raise problem w.r.t. '&' and '%' being used in non-entities
3123 * reference constructs. Note Charref will be handled in
3124 * xmlStringDecodeEntities()
3125 */
3126 cur = buf;
3127 while (*cur != 0) { /* non input consuming */
3128 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3129 xmlChar *name;
3130 xmlChar tmp = *cur;
3131
3132 cur++;
3133 name = xmlParseStringName(ctxt, &cur);
3134 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003135 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003136 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003137 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003138 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003139 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3140 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003141 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003142 }
3143 if (name != NULL)
3144 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003145 if (*cur == 0)
3146 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003147 }
3148 cur++;
3149 }
3150
3151 /*
3152 * Then PEReference entities are substituted.
3153 */
3154 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003155 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003156 xmlFree(buf);
3157 } else {
3158 NEXT;
3159 /*
3160 * NOTE: 4.4.7 Bypassed
3161 * When a general entity reference appears in the EntityValue in
3162 * an entity declaration, it is bypassed and left as is.
3163 * so XML_SUBSTITUTE_REF is not set here.
3164 */
3165 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3166 0, 0, 0);
3167 if (orig != NULL)
3168 *orig = buf;
3169 else
3170 xmlFree(buf);
3171 }
3172
3173 return(ret);
3174}
3175
3176/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003177 * xmlParseAttValueComplex:
3178 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003179 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003180 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003181 *
3182 * parse a value for an attribute, this is the fallback function
3183 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003184 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003185 *
3186 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3187 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003188static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003189xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003190 xmlChar limit = 0;
3191 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003192 int len = 0;
3193 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003194 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003195 xmlChar *current = NULL;
3196 xmlEntityPtr ent;
3197
Owen Taylor3473f882001-02-23 17:55:21 +00003198 if (NXT(0) == '"') {
3199 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3200 limit = '"';
3201 NEXT;
3202 } else if (NXT(0) == '\'') {
3203 limit = '\'';
3204 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3205 NEXT;
3206 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003207 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003208 return(NULL);
3209 }
3210
3211 /*
3212 * allocate a translation buffer.
3213 */
3214 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003215 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003216 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003217
3218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003220 */
3221 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003222 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003223 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003224 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003225 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003226 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003227 if (NXT(1) == '#') {
3228 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003229
Owen Taylor3473f882001-02-23 17:55:21 +00003230 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003231 if (ctxt->replaceEntities) {
3232 if (len > buf_size - 10) {
3233 growBuffer(buf);
3234 }
3235 buf[len++] = '&';
3236 } else {
3237 /*
3238 * The reparsing will be done in xmlStringGetNodeList()
3239 * called by the attribute() function in SAX.c
3240 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003241 if (len > buf_size - 10) {
3242 growBuffer(buf);
3243 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003244 buf[len++] = '&';
3245 buf[len++] = '#';
3246 buf[len++] = '3';
3247 buf[len++] = '8';
3248 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003249 }
3250 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003251 if (len > buf_size - 10) {
3252 growBuffer(buf);
3253 }
Owen Taylor3473f882001-02-23 17:55:21 +00003254 len += xmlCopyChar(0, &buf[len], val);
3255 }
3256 } else {
3257 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003258 if ((ent != NULL) &&
3259 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3260 if (len > buf_size - 10) {
3261 growBuffer(buf);
3262 }
3263 if ((ctxt->replaceEntities == 0) &&
3264 (ent->content[0] == '&')) {
3265 buf[len++] = '&';
3266 buf[len++] = '#';
3267 buf[len++] = '3';
3268 buf[len++] = '8';
3269 buf[len++] = ';';
3270 } else {
3271 buf[len++] = ent->content[0];
3272 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003273 } else if ((ent != NULL) &&
3274 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003275 xmlChar *rep;
3276
3277 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3278 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003279 XML_SUBSTITUTE_REF,
3280 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003281 if (rep != NULL) {
3282 current = rep;
3283 while (*current != 0) { /* non input consuming */
3284 buf[len++] = *current++;
3285 if (len > buf_size - 10) {
3286 growBuffer(buf);
3287 }
3288 }
3289 xmlFree(rep);
3290 }
3291 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003292 if (len > buf_size - 10) {
3293 growBuffer(buf);
3294 }
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if (ent->content != NULL)
3296 buf[len++] = ent->content[0];
3297 }
3298 } else if (ent != NULL) {
3299 int i = xmlStrlen(ent->name);
3300 const xmlChar *cur = ent->name;
3301
3302 /*
3303 * This may look absurd but is needed to detect
3304 * entities problems
3305 */
3306 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3307 (ent->content != NULL)) {
3308 xmlChar *rep;
3309 rep = xmlStringDecodeEntities(ctxt, ent->content,
3310 XML_SUBSTITUTE_REF, 0, 0, 0);
3311 if (rep != NULL)
3312 xmlFree(rep);
3313 }
3314
3315 /*
3316 * Just output the reference
3317 */
3318 buf[len++] = '&';
3319 if (len > buf_size - i - 10) {
3320 growBuffer(buf);
3321 }
3322 for (;i > 0;i--)
3323 buf[len++] = *cur++;
3324 buf[len++] = ';';
3325 }
3326 }
3327 } else {
3328 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003329 if ((len != 0) || (!normalize)) {
3330 if ((!normalize) || (!in_space)) {
3331 COPY_BUF(l,buf,len,0x20);
3332 if (len > buf_size - 10) {
3333 growBuffer(buf);
3334 }
3335 }
3336 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003337 }
3338 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003339 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003340 COPY_BUF(l,buf,len,c);
3341 if (len > buf_size - 10) {
3342 growBuffer(buf);
3343 }
3344 }
3345 NEXTL(l);
3346 }
3347 GROW;
3348 c = CUR_CHAR(l);
3349 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003350 if ((in_space) && (normalize)) {
3351 while (buf[len - 1] == 0x20) len--;
3352 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003353 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003354 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003355 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003356 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003357 if ((c != 0) && (!IS_CHAR(c))) {
3358 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3359 "invalid character in attribute value\n");
3360 } else {
3361 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3362 "AttValue: ' expected\n");
3363 }
Owen Taylor3473f882001-02-23 17:55:21 +00003364 } else
3365 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003366 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003367 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003368
3369mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003371 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003372}
3373
3374/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003375 * xmlParseAttValue:
3376 * @ctxt: an XML parser context
3377 *
3378 * parse a value for an attribute
3379 * Note: the parser won't do substitution of entities here, this
3380 * will be handled later in xmlStringGetNodeList
3381 *
3382 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3383 * "'" ([^<&'] | Reference)* "'"
3384 *
3385 * 3.3.3 Attribute-Value Normalization:
3386 * Before the value of an attribute is passed to the application or
3387 * checked for validity, the XML processor must normalize it as follows:
3388 * - a character reference is processed by appending the referenced
3389 * character to the attribute value
3390 * - an entity reference is processed by recursively processing the
3391 * replacement text of the entity
3392 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3393 * appending #x20 to the normalized value, except that only a single
3394 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3395 * parsed entity or the literal entity value of an internal parsed entity
3396 * - other characters are processed by appending them to the normalized value
3397 * If the declared value is not CDATA, then the XML processor must further
3398 * process the normalized attribute value by discarding any leading and
3399 * trailing space (#x20) characters, and by replacing sequences of space
3400 * (#x20) characters by a single space (#x20) character.
3401 * All attributes for which no declaration has been read should be treated
3402 * by a non-validating parser as if declared CDATA.
3403 *
3404 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3405 */
3406
3407
3408xmlChar *
3409xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003410 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003411 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003412}
3413
3414/**
Owen Taylor3473f882001-02-23 17:55:21 +00003415 * xmlParseSystemLiteral:
3416 * @ctxt: an XML parser context
3417 *
3418 * parse an XML Literal
3419 *
3420 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3421 *
3422 * Returns the SystemLiteral parsed or NULL
3423 */
3424
3425xmlChar *
3426xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3427 xmlChar *buf = NULL;
3428 int len = 0;
3429 int size = XML_PARSER_BUFFER_SIZE;
3430 int cur, l;
3431 xmlChar stop;
3432 int state = ctxt->instate;
3433 int count = 0;
3434
3435 SHRINK;
3436 if (RAW == '"') {
3437 NEXT;
3438 stop = '"';
3439 } else if (RAW == '\'') {
3440 NEXT;
3441 stop = '\'';
3442 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003443 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003444 return(NULL);
3445 }
3446
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003447 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003448 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003449 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003450 return(NULL);
3451 }
3452 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3453 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003454 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003455 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003456 xmlChar *tmp;
3457
Owen Taylor3473f882001-02-23 17:55:21 +00003458 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003459 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3460 if (tmp == NULL) {
3461 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003462 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003463 ctxt->instate = (xmlParserInputState) state;
3464 return(NULL);
3465 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003466 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003467 }
3468 count++;
3469 if (count > 50) {
3470 GROW;
3471 count = 0;
3472 }
3473 COPY_BUF(l,buf,len,cur);
3474 NEXTL(l);
3475 cur = CUR_CHAR(l);
3476 if (cur == 0) {
3477 GROW;
3478 SHRINK;
3479 cur = CUR_CHAR(l);
3480 }
3481 }
3482 buf[len] = 0;
3483 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003484 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 } else {
3487 NEXT;
3488 }
3489 return(buf);
3490}
3491
3492/**
3493 * xmlParsePubidLiteral:
3494 * @ctxt: an XML parser context
3495 *
3496 * parse an XML public literal
3497 *
3498 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3499 *
3500 * Returns the PubidLiteral parsed or NULL.
3501 */
3502
3503xmlChar *
3504xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3505 xmlChar *buf = NULL;
3506 int len = 0;
3507 int size = XML_PARSER_BUFFER_SIZE;
3508 xmlChar cur;
3509 xmlChar stop;
3510 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003511 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003512
3513 SHRINK;
3514 if (RAW == '"') {
3515 NEXT;
3516 stop = '"';
3517 } else if (RAW == '\'') {
3518 NEXT;
3519 stop = '\'';
3520 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003521 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003524 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003525 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003526 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003527 return(NULL);
3528 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003529 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003530 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003531 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003532 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003533 xmlChar *tmp;
3534
Owen Taylor3473f882001-02-23 17:55:21 +00003535 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003536 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3537 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003538 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003539 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003540 return(NULL);
3541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003542 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
3544 buf[len++] = cur;
3545 count++;
3546 if (count > 50) {
3547 GROW;
3548 count = 0;
3549 }
3550 NEXT;
3551 cur = CUR;
3552 if (cur == 0) {
3553 GROW;
3554 SHRINK;
3555 cur = CUR;
3556 }
3557 }
3558 buf[len] = 0;
3559 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 } else {
3562 NEXT;
3563 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003564 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 return(buf);
3566}
3567
Daniel Veillard48b2f892001-02-25 16:11:03 +00003568void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003569
3570/*
3571 * used for the test in the inner loop of the char data testing
3572 */
3573static const unsigned char test_char_data[256] = {
3574 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3575 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3576 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3577 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3578 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3579 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3580 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3581 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3582 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3583 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3584 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3585 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3586 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3587 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3588 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3589 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3590 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3591 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3592 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3593 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3594 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3595 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3596 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3597 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3598 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3599 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3601 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3606};
3607
Owen Taylor3473f882001-02-23 17:55:21 +00003608/**
3609 * xmlParseCharData:
3610 * @ctxt: an XML parser context
3611 * @cdata: int indicating whether we are within a CDATA section
3612 *
3613 * parse a CharData section.
3614 * if we are within a CDATA section ']]>' marks an end of section.
3615 *
3616 * The right angle bracket (>) may be represented using the string "&gt;",
3617 * and must, for compatibility, be escaped using "&gt;" or a character
3618 * reference when it appears in the string "]]>" in content, when that
3619 * string is not marking the end of a CDATA section.
3620 *
3621 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3622 */
3623
3624void
3625xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003626 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003627 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003628 int line = ctxt->input->line;
3629 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003630 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003631
3632 SHRINK;
3633 GROW;
3634 /*
3635 * Accelerated common case where input don't need to be
3636 * modified before passing it to the handler.
3637 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003638 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003639 in = ctxt->input->cur;
3640 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003641get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003642 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003643 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003644 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003645 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003646 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003647 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003648 goto get_more_space;
3649 }
3650 if (*in == '<') {
3651 nbchar = in - ctxt->input->cur;
3652 if (nbchar > 0) {
3653 const xmlChar *tmp = ctxt->input->cur;
3654 ctxt->input->cur = in;
3655
Daniel Veillard34099b42004-11-04 17:34:35 +00003656 if ((ctxt->sax != NULL) &&
3657 (ctxt->sax->ignorableWhitespace !=
3658 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003659 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003660 if (ctxt->sax->ignorableWhitespace != NULL)
3661 ctxt->sax->ignorableWhitespace(ctxt->userData,
3662 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003663 } else {
3664 if (ctxt->sax->characters != NULL)
3665 ctxt->sax->characters(ctxt->userData,
3666 tmp, nbchar);
3667 if (*ctxt->space == -1)
3668 *ctxt->space = -2;
3669 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003670 } else if ((ctxt->sax != NULL) &&
3671 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003672 ctxt->sax->characters(ctxt->userData,
3673 tmp, nbchar);
3674 }
3675 }
3676 return;
3677 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003678
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003679get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003680 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003681 while (test_char_data[*in]) {
3682 in++;
3683 ccol++;
3684 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003685 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003686 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003687 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003688 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003689 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003690 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003691 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003692 }
3693 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003694 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003695 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003696 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003697 return;
3698 }
3699 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003700 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003701 goto get_more;
3702 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003703 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003704 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003705 if ((ctxt->sax != NULL) &&
3706 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003707 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003708 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003709 const xmlChar *tmp = ctxt->input->cur;
3710 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003711
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003712 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003713 if (ctxt->sax->ignorableWhitespace != NULL)
3714 ctxt->sax->ignorableWhitespace(ctxt->userData,
3715 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003716 } else {
3717 if (ctxt->sax->characters != NULL)
3718 ctxt->sax->characters(ctxt->userData,
3719 tmp, nbchar);
3720 if (*ctxt->space == -1)
3721 *ctxt->space = -2;
3722 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003723 line = ctxt->input->line;
3724 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003725 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003726 if (ctxt->sax->characters != NULL)
3727 ctxt->sax->characters(ctxt->userData,
3728 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003729 line = ctxt->input->line;
3730 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003731 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003732 }
3733 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003734 if (*in == 0xD) {
3735 in++;
3736 if (*in == 0xA) {
3737 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003738 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003739 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003740 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003741 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003742 in--;
3743 }
3744 if (*in == '<') {
3745 return;
3746 }
3747 if (*in == '&') {
3748 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003749 }
3750 SHRINK;
3751 GROW;
3752 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003753 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003754 nbchar = 0;
3755 }
Daniel Veillard50582112001-03-26 22:52:16 +00003756 ctxt->input->line = line;
3757 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003758 xmlParseCharDataComplex(ctxt, cdata);
3759}
3760
Daniel Veillard01c13b52002-12-10 15:19:08 +00003761/**
3762 * xmlParseCharDataComplex:
3763 * @ctxt: an XML parser context
3764 * @cdata: int indicating whether we are within a CDATA section
3765 *
3766 * parse a CharData section.this is the fallback function
3767 * of xmlParseCharData() when the parsing requires handling
3768 * of non-ASCII characters.
3769 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003770void
3771xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003772 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3773 int nbchar = 0;
3774 int cur, l;
3775 int count = 0;
3776
3777 SHRINK;
3778 GROW;
3779 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003780 while ((cur != '<') && /* checked */
3781 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003782 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003783 if ((cur == ']') && (NXT(1) == ']') &&
3784 (NXT(2) == '>')) {
3785 if (cdata) break;
3786 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003787 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003788 }
3789 }
3790 COPY_BUF(l,buf,nbchar,cur);
3791 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003792 buf[nbchar] = 0;
3793
Owen Taylor3473f882001-02-23 17:55:21 +00003794 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003795 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003796 */
3797 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003798 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003799 if (ctxt->sax->ignorableWhitespace != NULL)
3800 ctxt->sax->ignorableWhitespace(ctxt->userData,
3801 buf, nbchar);
3802 } else {
3803 if (ctxt->sax->characters != NULL)
3804 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003805 if ((ctxt->sax->characters !=
3806 ctxt->sax->ignorableWhitespace) &&
3807 (*ctxt->space == -1))
3808 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003809 }
3810 }
3811 nbchar = 0;
3812 }
3813 count++;
3814 if (count > 50) {
3815 GROW;
3816 count = 0;
3817 }
3818 NEXTL(l);
3819 cur = CUR_CHAR(l);
3820 }
3821 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003822 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003824 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003825 */
3826 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003827 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003828 if (ctxt->sax->ignorableWhitespace != NULL)
3829 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3830 } else {
3831 if (ctxt->sax->characters != NULL)
3832 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003833 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3834 (*ctxt->space == -1))
3835 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003836 }
3837 }
3838 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003839 if ((cur != 0) && (!IS_CHAR(cur))) {
3840 /* Generate the error and skip the offending character */
3841 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3842 "PCDATA invalid Char value %d\n",
3843 cur);
3844 NEXTL(l);
3845 }
Owen Taylor3473f882001-02-23 17:55:21 +00003846}
3847
3848/**
3849 * xmlParseExternalID:
3850 * @ctxt: an XML parser context
3851 * @publicID: a xmlChar** receiving PubidLiteral
3852 * @strict: indicate whether we should restrict parsing to only
3853 * production [75], see NOTE below
3854 *
3855 * Parse an External ID or a Public ID
3856 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003857 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003858 * 'PUBLIC' S PubidLiteral S SystemLiteral
3859 *
3860 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3861 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3862 *
3863 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3864 *
3865 * Returns the function returns SystemLiteral and in the second
3866 * case publicID receives PubidLiteral, is strict is off
3867 * it is possible to return NULL and have publicID set.
3868 */
3869
3870xmlChar *
3871xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3872 xmlChar *URI = NULL;
3873
3874 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003875
3876 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003877 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003878 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003879 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003880 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3881 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003882 }
3883 SKIP_BLANKS;
3884 URI = xmlParseSystemLiteral(ctxt);
3885 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003886 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003888 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003889 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003890 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003891 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003892 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003893 }
3894 SKIP_BLANKS;
3895 *publicID = xmlParsePubidLiteral(ctxt);
3896 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003897 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 }
3899 if (strict) {
3900 /*
3901 * We don't handle [83] so "S SystemLiteral" is required.
3902 */
William M. Brack76e95df2003-10-18 16:20:14 +00003903 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003905 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003906 }
3907 } else {
3908 /*
3909 * We handle [83] so we return immediately, if
3910 * "S SystemLiteral" is not detected. From a purely parsing
3911 * point of view that's a nice mess.
3912 */
3913 const xmlChar *ptr;
3914 GROW;
3915
3916 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003917 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003918
William M. Brack76e95df2003-10-18 16:20:14 +00003919 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003920 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3921 }
3922 SKIP_BLANKS;
3923 URI = xmlParseSystemLiteral(ctxt);
3924 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003925 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003926 }
3927 }
3928 return(URI);
3929}
3930
3931/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003932 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003933 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003934 * @buf: the already parsed part of the buffer
3935 * @len: number of bytes filles in the buffer
3936 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003937 *
3938 * Skip an XML (SGML) comment <!-- .... -->
3939 * The spec says that "For compatibility, the string "--" (double-hyphen)
3940 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003941 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003942 *
3943 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3944 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003945static void
3946xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003947 int q, ql;
3948 int r, rl;
3949 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 xmlParserInputPtr input = ctxt->input;
3951 int count = 0;
3952
Owen Taylor3473f882001-02-23 17:55:21 +00003953 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003954 len = 0;
3955 size = XML_PARSER_BUFFER_SIZE;
3956 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3957 if (buf == NULL) {
3958 xmlErrMemory(ctxt, NULL);
3959 return;
3960 }
Owen Taylor3473f882001-02-23 17:55:21 +00003961 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003962 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003963 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003964 if (q == 0)
3965 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003966 if (!IS_CHAR(q)) {
3967 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3968 "xmlParseComment: invalid xmlChar value %d\n",
3969 q);
3970 xmlFree (buf);
3971 return;
3972 }
Owen Taylor3473f882001-02-23 17:55:21 +00003973 NEXTL(ql);
3974 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003975 if (r == 0)
3976 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003977 if (!IS_CHAR(r)) {
3978 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3979 "xmlParseComment: invalid xmlChar value %d\n",
3980 q);
3981 xmlFree (buf);
3982 return;
3983 }
Owen Taylor3473f882001-02-23 17:55:21 +00003984 NEXTL(rl);
3985 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003986 if (cur == 0)
3987 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003988 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003989 ((cur != '>') ||
3990 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003991 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003992 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003993 }
3994 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003995 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003996 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003997 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3998 if (new_buf == NULL) {
3999 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004000 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004001 return;
4002 }
William M. Bracka3215c72004-07-31 16:24:01 +00004003 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004004 }
4005 COPY_BUF(ql,buf,len,q);
4006 q = r;
4007 ql = rl;
4008 r = cur;
4009 rl = l;
4010
4011 count++;
4012 if (count > 50) {
4013 GROW;
4014 count = 0;
4015 }
4016 NEXTL(l);
4017 cur = CUR_CHAR(l);
4018 if (cur == 0) {
4019 SHRINK;
4020 GROW;
4021 cur = CUR_CHAR(l);
4022 }
4023 }
4024 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004025 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004026 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004027 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004028 } else if (!IS_CHAR(cur)) {
4029 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4030 "xmlParseComment: invalid xmlChar value %d\n",
4031 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004032 } else {
4033 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004034 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4035 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004036 }
4037 NEXT;
4038 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4039 (!ctxt->disableSAX))
4040 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 }
Daniel Veillardda629342007-08-01 07:49:06 +00004042 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004043 return;
4044not_terminated:
4045 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4046 "Comment not terminated\n", NULL);
4047 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004048 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004049}
Daniel Veillardda629342007-08-01 07:49:06 +00004050
Daniel Veillard4c778d82005-01-23 17:37:44 +00004051/**
4052 * xmlParseComment:
4053 * @ctxt: an XML parser context
4054 *
4055 * Skip an XML (SGML) comment <!-- .... -->
4056 * The spec says that "For compatibility, the string "--" (double-hyphen)
4057 * must not occur within comments. "
4058 *
4059 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4060 */
4061void
4062xmlParseComment(xmlParserCtxtPtr ctxt) {
4063 xmlChar *buf = NULL;
4064 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004065 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004066 xmlParserInputState state;
4067 const xmlChar *in;
4068 int nbchar = 0, ccol;
4069
4070 /*
4071 * Check that there is a comment right here.
4072 */
4073 if ((RAW != '<') || (NXT(1) != '!') ||
4074 (NXT(2) != '-') || (NXT(3) != '-')) return;
4075
4076 state = ctxt->instate;
4077 ctxt->instate = XML_PARSER_COMMENT;
4078 SKIP(4);
4079 SHRINK;
4080 GROW;
4081
4082 /*
4083 * Accelerated common case where input don't need to be
4084 * modified before passing it to the handler.
4085 */
4086 in = ctxt->input->cur;
4087 do {
4088 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004089 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004090 ctxt->input->line++; ctxt->input->col = 1;
4091 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004092 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004093 }
4094get_more:
4095 ccol = ctxt->input->col;
4096 while (((*in > '-') && (*in <= 0x7F)) ||
4097 ((*in >= 0x20) && (*in < '-')) ||
4098 (*in == 0x09)) {
4099 in++;
4100 ccol++;
4101 }
4102 ctxt->input->col = ccol;
4103 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004104 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004105 ctxt->input->line++; ctxt->input->col = 1;
4106 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004107 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004108 goto get_more;
4109 }
4110 nbchar = in - ctxt->input->cur;
4111 /*
4112 * save current set of data
4113 */
4114 if (nbchar > 0) {
4115 if ((ctxt->sax != NULL) &&
4116 (ctxt->sax->comment != NULL)) {
4117 if (buf == NULL) {
4118 if ((*in == '-') && (in[1] == '-'))
4119 size = nbchar + 1;
4120 else
4121 size = XML_PARSER_BUFFER_SIZE + nbchar;
4122 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4123 if (buf == NULL) {
4124 xmlErrMemory(ctxt, NULL);
4125 ctxt->instate = state;
4126 return;
4127 }
4128 len = 0;
4129 } else if (len + nbchar + 1 >= size) {
4130 xmlChar *new_buf;
4131 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4132 new_buf = (xmlChar *) xmlRealloc(buf,
4133 size * sizeof(xmlChar));
4134 if (new_buf == NULL) {
4135 xmlFree (buf);
4136 xmlErrMemory(ctxt, NULL);
4137 ctxt->instate = state;
4138 return;
4139 }
4140 buf = new_buf;
4141 }
4142 memcpy(&buf[len], ctxt->input->cur, nbchar);
4143 len += nbchar;
4144 buf[len] = 0;
4145 }
4146 }
4147 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004148 if (*in == 0xA) {
4149 in++;
4150 ctxt->input->line++; ctxt->input->col = 1;
4151 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004152 if (*in == 0xD) {
4153 in++;
4154 if (*in == 0xA) {
4155 ctxt->input->cur = in;
4156 in++;
4157 ctxt->input->line++; ctxt->input->col = 1;
4158 continue; /* while */
4159 }
4160 in--;
4161 }
4162 SHRINK;
4163 GROW;
4164 in = ctxt->input->cur;
4165 if (*in == '-') {
4166 if (in[1] == '-') {
4167 if (in[2] == '>') {
4168 SKIP(3);
4169 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4170 (!ctxt->disableSAX)) {
4171 if (buf != NULL)
4172 ctxt->sax->comment(ctxt->userData, buf);
4173 else
4174 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4175 }
4176 if (buf != NULL)
4177 xmlFree(buf);
4178 ctxt->instate = state;
4179 return;
4180 }
4181 if (buf != NULL)
4182 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4183 "Comment not terminated \n<!--%.50s\n",
4184 buf);
4185 else
4186 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4187 "Comment not terminated \n", NULL);
4188 in++;
4189 ctxt->input->col++;
4190 }
4191 in++;
4192 ctxt->input->col++;
4193 goto get_more;
4194 }
4195 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4196 xmlParseCommentComplex(ctxt, buf, len, size);
4197 ctxt->instate = state;
4198 return;
4199}
4200
Owen Taylor3473f882001-02-23 17:55:21 +00004201
4202/**
4203 * xmlParsePITarget:
4204 * @ctxt: an XML parser context
4205 *
4206 * parse the name of a PI
4207 *
4208 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4209 *
4210 * Returns the PITarget name or NULL
4211 */
4212
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004213const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004214xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004215 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004216
4217 name = xmlParseName(ctxt);
4218 if ((name != NULL) &&
4219 ((name[0] == 'x') || (name[0] == 'X')) &&
4220 ((name[1] == 'm') || (name[1] == 'M')) &&
4221 ((name[2] == 'l') || (name[2] == 'L'))) {
4222 int i;
4223 if ((name[0] == 'x') && (name[1] == 'm') &&
4224 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004225 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004226 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004227 return(name);
4228 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004229 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004230 return(name);
4231 }
4232 for (i = 0;;i++) {
4233 if (xmlW3CPIs[i] == NULL) break;
4234 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4235 return(name);
4236 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004237 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4238 "xmlParsePITarget: invalid name prefix 'xml'\n",
4239 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 return(name);
4242}
4243
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004244#ifdef LIBXML_CATALOG_ENABLED
4245/**
4246 * xmlParseCatalogPI:
4247 * @ctxt: an XML parser context
4248 * @catalog: the PI value string
4249 *
4250 * parse an XML Catalog Processing Instruction.
4251 *
4252 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4253 *
4254 * Occurs only if allowed by the user and if happening in the Misc
4255 * part of the document before any doctype informations
4256 * This will add the given catalog to the parsing context in order
4257 * to be used if there is a resolution need further down in the document
4258 */
4259
4260static void
4261xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4262 xmlChar *URL = NULL;
4263 const xmlChar *tmp, *base;
4264 xmlChar marker;
4265
4266 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004267 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004268 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4269 goto error;
4270 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004271 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004272 if (*tmp != '=') {
4273 return;
4274 }
4275 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004276 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004277 marker = *tmp;
4278 if ((marker != '\'') && (marker != '"'))
4279 goto error;
4280 tmp++;
4281 base = tmp;
4282 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4283 if (*tmp == 0)
4284 goto error;
4285 URL = xmlStrndup(base, tmp - base);
4286 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004287 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004288 if (*tmp != 0)
4289 goto error;
4290
4291 if (URL != NULL) {
4292 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4293 xmlFree(URL);
4294 }
4295 return;
4296
4297error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004298 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4299 "Catalog PI syntax error: %s\n",
4300 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004301 if (URL != NULL)
4302 xmlFree(URL);
4303}
4304#endif
4305
Owen Taylor3473f882001-02-23 17:55:21 +00004306/**
4307 * xmlParsePI:
4308 * @ctxt: an XML parser context
4309 *
4310 * parse an XML Processing Instruction.
4311 *
4312 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4313 *
4314 * The processing is transfered to SAX once parsed.
4315 */
4316
4317void
4318xmlParsePI(xmlParserCtxtPtr ctxt) {
4319 xmlChar *buf = NULL;
4320 int len = 0;
4321 int size = XML_PARSER_BUFFER_SIZE;
4322 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004323 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004324 xmlParserInputState state;
4325 int count = 0;
4326
4327 if ((RAW == '<') && (NXT(1) == '?')) {
4328 xmlParserInputPtr input = ctxt->input;
4329 state = ctxt->instate;
4330 ctxt->instate = XML_PARSER_PI;
4331 /*
4332 * this is a Processing Instruction.
4333 */
4334 SKIP(2);
4335 SHRINK;
4336
4337 /*
4338 * Parse the target name and check for special support like
4339 * namespace.
4340 */
4341 target = xmlParsePITarget(ctxt);
4342 if (target != NULL) {
4343 if ((RAW == '?') && (NXT(1) == '>')) {
4344 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4346 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 SKIP(2);
4349
4350 /*
4351 * SAX: PI detected.
4352 */
4353 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4354 (ctxt->sax->processingInstruction != NULL))
4355 ctxt->sax->processingInstruction(ctxt->userData,
4356 target, NULL);
4357 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 return;
4359 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004360 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004361 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004362 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004363 ctxt->instate = state;
4364 return;
4365 }
4366 cur = CUR;
4367 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004368 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4369 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004370 }
4371 SKIP_BLANKS;
4372 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004373 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004374 ((cur != '?') || (NXT(1) != '>'))) {
4375 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004376 xmlChar *tmp;
4377
Owen Taylor3473f882001-02-23 17:55:21 +00004378 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004379 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4380 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004381 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004382 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004383 ctxt->instate = state;
4384 return;
4385 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004386 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004387 }
4388 count++;
4389 if (count > 50) {
4390 GROW;
4391 count = 0;
4392 }
4393 COPY_BUF(l,buf,len,cur);
4394 NEXTL(l);
4395 cur = CUR_CHAR(l);
4396 if (cur == 0) {
4397 SHRINK;
4398 GROW;
4399 cur = CUR_CHAR(l);
4400 }
4401 }
4402 buf[len] = 0;
4403 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004404 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4405 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004406 } else {
4407 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004408 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4409 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004410 }
4411 SKIP(2);
4412
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004413#ifdef LIBXML_CATALOG_ENABLED
4414 if (((state == XML_PARSER_MISC) ||
4415 (state == XML_PARSER_START)) &&
4416 (xmlStrEqual(target, XML_CATALOG_PI))) {
4417 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4418 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4419 (allow == XML_CATA_ALLOW_ALL))
4420 xmlParseCatalogPI(ctxt, buf);
4421 }
4422#endif
4423
4424
Owen Taylor3473f882001-02-23 17:55:21 +00004425 /*
4426 * SAX: PI detected.
4427 */
4428 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4429 (ctxt->sax->processingInstruction != NULL))
4430 ctxt->sax->processingInstruction(ctxt->userData,
4431 target, buf);
4432 }
4433 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004434 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004435 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437 ctxt->instate = state;
4438 }
4439}
4440
4441/**
4442 * xmlParseNotationDecl:
4443 * @ctxt: an XML parser context
4444 *
4445 * parse a notation declaration
4446 *
4447 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4448 *
4449 * Hence there is actually 3 choices:
4450 * 'PUBLIC' S PubidLiteral
4451 * 'PUBLIC' S PubidLiteral S SystemLiteral
4452 * and 'SYSTEM' S SystemLiteral
4453 *
4454 * See the NOTE on xmlParseExternalID().
4455 */
4456
4457void
4458xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004459 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 xmlChar *Pubid;
4461 xmlChar *Systemid;
4462
Daniel Veillarda07050d2003-10-19 14:46:32 +00004463 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004464 xmlParserInputPtr input = ctxt->input;
4465 SHRINK;
4466 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004467 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004468 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4469 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004470 return;
4471 }
4472 SKIP_BLANKS;
4473
Daniel Veillard76d66f42001-05-16 21:05:17 +00004474 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004475 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004476 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 return;
4478 }
William M. Brack76e95df2003-10-18 16:20:14 +00004479 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004481 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004482 return;
4483 }
4484 SKIP_BLANKS;
4485
4486 /*
4487 * Parse the IDs.
4488 */
4489 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4490 SKIP_BLANKS;
4491
4492 if (RAW == '>') {
4493 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4495 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004496 }
4497 NEXT;
4498 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4499 (ctxt->sax->notationDecl != NULL))
4500 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004502 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
Owen Taylor3473f882001-02-23 17:55:21 +00004504 if (Systemid != NULL) xmlFree(Systemid);
4505 if (Pubid != NULL) xmlFree(Pubid);
4506 }
4507}
4508
4509/**
4510 * xmlParseEntityDecl:
4511 * @ctxt: an XML parser context
4512 *
4513 * parse <!ENTITY declarations
4514 *
4515 * [70] EntityDecl ::= GEDecl | PEDecl
4516 *
4517 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4518 *
4519 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4520 *
4521 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4522 *
4523 * [74] PEDef ::= EntityValue | ExternalID
4524 *
4525 * [76] NDataDecl ::= S 'NDATA' S Name
4526 *
4527 * [ VC: Notation Declared ]
4528 * The Name must match the declared name of a notation.
4529 */
4530
4531void
4532xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004533 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 xmlChar *value = NULL;
4535 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004536 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004537 int isParameter = 0;
4538 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004539 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004540
Daniel Veillard4c778d82005-01-23 17:37:44 +00004541 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004542 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004543 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 SHRINK;
4545 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004546 skipped = SKIP_BLANKS;
4547 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4549 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004550 }
Owen Taylor3473f882001-02-23 17:55:21 +00004551
4552 if (RAW == '%') {
4553 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004554 skipped = SKIP_BLANKS;
4555 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4557 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
Owen Taylor3473f882001-02-23 17:55:21 +00004559 isParameter = 1;
4560 }
4561
Daniel Veillard76d66f42001-05-16 21:05:17 +00004562 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004563 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4565 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004566 return;
4567 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004568 skipped = SKIP_BLANKS;
4569 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004570 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4571 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004572 }
Owen Taylor3473f882001-02-23 17:55:21 +00004573
Daniel Veillardf5582f12002-06-11 10:08:16 +00004574 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004575 /*
4576 * handle the various case of definitions...
4577 */
4578 if (isParameter) {
4579 if ((RAW == '"') || (RAW == '\'')) {
4580 value = xmlParseEntityValue(ctxt, &orig);
4581 if (value) {
4582 if ((ctxt->sax != NULL) &&
4583 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4584 ctxt->sax->entityDecl(ctxt->userData, name,
4585 XML_INTERNAL_PARAMETER_ENTITY,
4586 NULL, NULL, value);
4587 }
4588 } else {
4589 URI = xmlParseExternalID(ctxt, &literal, 1);
4590 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004591 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 }
4593 if (URI) {
4594 xmlURIPtr uri;
4595
4596 uri = xmlParseURI((const char *) URI);
4597 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004598 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4599 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004600 /*
4601 * This really ought to be a well formedness error
4602 * but the XML Core WG decided otherwise c.f. issue
4603 * E26 of the XML erratas.
4604 */
Owen Taylor3473f882001-02-23 17:55:21 +00004605 } else {
4606 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004607 /*
4608 * Okay this is foolish to block those but not
4609 * invalid URIs.
4610 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004611 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004612 } else {
4613 if ((ctxt->sax != NULL) &&
4614 (!ctxt->disableSAX) &&
4615 (ctxt->sax->entityDecl != NULL))
4616 ctxt->sax->entityDecl(ctxt->userData, name,
4617 XML_EXTERNAL_PARAMETER_ENTITY,
4618 literal, URI, NULL);
4619 }
4620 xmlFreeURI(uri);
4621 }
4622 }
4623 }
4624 } else {
4625 if ((RAW == '"') || (RAW == '\'')) {
4626 value = xmlParseEntityValue(ctxt, &orig);
4627 if ((ctxt->sax != NULL) &&
4628 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4629 ctxt->sax->entityDecl(ctxt->userData, name,
4630 XML_INTERNAL_GENERAL_ENTITY,
4631 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004632 /*
4633 * For expat compatibility in SAX mode.
4634 */
4635 if ((ctxt->myDoc == NULL) ||
4636 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4637 if (ctxt->myDoc == NULL) {
4638 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4639 }
4640 if (ctxt->myDoc->intSubset == NULL)
4641 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4642 BAD_CAST "fake", NULL, NULL);
4643
Daniel Veillard1af9a412003-08-20 22:54:39 +00004644 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4645 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004646 }
Owen Taylor3473f882001-02-23 17:55:21 +00004647 } else {
4648 URI = xmlParseExternalID(ctxt, &literal, 1);
4649 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004650 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 }
4652 if (URI) {
4653 xmlURIPtr uri;
4654
4655 uri = xmlParseURI((const char *)URI);
4656 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004657 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4658 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004659 /*
4660 * This really ought to be a well formedness error
4661 * but the XML Core WG decided otherwise c.f. issue
4662 * E26 of the XML erratas.
4663 */
Owen Taylor3473f882001-02-23 17:55:21 +00004664 } else {
4665 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004666 /*
4667 * Okay this is foolish to block those but not
4668 * invalid URIs.
4669 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004670 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004671 }
4672 xmlFreeURI(uri);
4673 }
4674 }
William M. Brack76e95df2003-10-18 16:20:14 +00004675 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004676 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4677 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004678 }
4679 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004680 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004681 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004682 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4684 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004685 }
4686 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004687 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004688 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4689 (ctxt->sax->unparsedEntityDecl != NULL))
4690 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4691 literal, URI, ndata);
4692 } else {
4693 if ((ctxt->sax != NULL) &&
4694 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4695 ctxt->sax->entityDecl(ctxt->userData, name,
4696 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4697 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004698 /*
4699 * For expat compatibility in SAX mode.
4700 * assuming the entity repalcement was asked for
4701 */
4702 if ((ctxt->replaceEntities != 0) &&
4703 ((ctxt->myDoc == NULL) ||
4704 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4705 if (ctxt->myDoc == NULL) {
4706 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4707 }
4708
4709 if (ctxt->myDoc->intSubset == NULL)
4710 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4711 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004712 xmlSAX2EntityDecl(ctxt, name,
4713 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4714 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004715 }
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 }
4718 }
4719 SKIP_BLANKS;
4720 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004721 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004722 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004723 } else {
4724 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004725 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4726 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004727 }
4728 NEXT;
4729 }
4730 if (orig != NULL) {
4731 /*
4732 * Ugly mechanism to save the raw entity value.
4733 */
4734 xmlEntityPtr cur = NULL;
4735
4736 if (isParameter) {
4737 if ((ctxt->sax != NULL) &&
4738 (ctxt->sax->getParameterEntity != NULL))
4739 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4740 } else {
4741 if ((ctxt->sax != NULL) &&
4742 (ctxt->sax->getEntity != NULL))
4743 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004744 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004745 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004746 }
Owen Taylor3473f882001-02-23 17:55:21 +00004747 }
4748 if (cur != NULL) {
4749 if (cur->orig != NULL)
4750 xmlFree(orig);
4751 else
4752 cur->orig = orig;
4753 } else
4754 xmlFree(orig);
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 if (value != NULL) xmlFree(value);
4757 if (URI != NULL) xmlFree(URI);
4758 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004759 }
4760}
4761
4762/**
4763 * xmlParseDefaultDecl:
4764 * @ctxt: an XML parser context
4765 * @value: Receive a possible fixed default value for the attribute
4766 *
4767 * Parse an attribute default declaration
4768 *
4769 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4770 *
4771 * [ VC: Required Attribute ]
4772 * if the default declaration is the keyword #REQUIRED, then the
4773 * attribute must be specified for all elements of the type in the
4774 * attribute-list declaration.
4775 *
4776 * [ VC: Attribute Default Legal ]
4777 * The declared default value must meet the lexical constraints of
4778 * the declared attribute type c.f. xmlValidateAttributeDecl()
4779 *
4780 * [ VC: Fixed Attribute Default ]
4781 * if an attribute has a default value declared with the #FIXED
4782 * keyword, instances of that attribute must match the default value.
4783 *
4784 * [ WFC: No < in Attribute Values ]
4785 * handled in xmlParseAttValue()
4786 *
4787 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4788 * or XML_ATTRIBUTE_FIXED.
4789 */
4790
4791int
4792xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4793 int val;
4794 xmlChar *ret;
4795
4796 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004797 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(9);
4799 return(XML_ATTRIBUTE_REQUIRED);
4800 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004801 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004802 SKIP(8);
4803 return(XML_ATTRIBUTE_IMPLIED);
4804 }
4805 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004806 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004807 SKIP(6);
4808 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004809 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004810 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004812 }
4813 SKIP_BLANKS;
4814 }
4815 ret = xmlParseAttValue(ctxt);
4816 ctxt->instate = XML_PARSER_DTD;
4817 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004818 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004819 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004820 } else
4821 *value = ret;
4822 return(val);
4823}
4824
4825/**
4826 * xmlParseNotationType:
4827 * @ctxt: an XML parser context
4828 *
4829 * parse an Notation attribute type.
4830 *
4831 * Note: the leading 'NOTATION' S part has already being parsed...
4832 *
4833 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4834 *
4835 * [ VC: Notation Attributes ]
4836 * Values of this type must match one of the notation names included
4837 * in the declaration; all notation names in the declaration must be declared.
4838 *
4839 * Returns: the notation attribute tree built while parsing
4840 */
4841
4842xmlEnumerationPtr
4843xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004844 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004845 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4846
4847 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004848 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004849 return(NULL);
4850 }
4851 SHRINK;
4852 do {
4853 NEXT;
4854 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004855 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4858 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004859 return(ret);
4860 }
4861 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 if (cur == NULL) return(ret);
4863 if (last == NULL) ret = last = cur;
4864 else {
4865 last->next = cur;
4866 last = cur;
4867 }
4868 SKIP_BLANKS;
4869 } while (RAW == '|');
4870 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004871 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004872 if ((last != NULL) && (last != ret))
4873 xmlFreeEnumeration(last);
4874 return(ret);
4875 }
4876 NEXT;
4877 return(ret);
4878}
4879
4880/**
4881 * xmlParseEnumerationType:
4882 * @ctxt: an XML parser context
4883 *
4884 * parse an Enumeration attribute type.
4885 *
4886 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4887 *
4888 * [ VC: Enumeration ]
4889 * Values of this type must match one of the Nmtoken tokens in
4890 * the declaration
4891 *
4892 * Returns: the enumeration attribute tree built while parsing
4893 */
4894
4895xmlEnumerationPtr
4896xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4897 xmlChar *name;
4898 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4899
4900 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004901 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004902 return(NULL);
4903 }
4904 SHRINK;
4905 do {
4906 NEXT;
4907 SKIP_BLANKS;
4908 name = xmlParseNmtoken(ctxt);
4909 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004910 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004911 return(ret);
4912 }
4913 cur = xmlCreateEnumeration(name);
4914 xmlFree(name);
4915 if (cur == NULL) return(ret);
4916 if (last == NULL) ret = last = cur;
4917 else {
4918 last->next = cur;
4919 last = cur;
4920 }
4921 SKIP_BLANKS;
4922 } while (RAW == '|');
4923 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004924 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004925 return(ret);
4926 }
4927 NEXT;
4928 return(ret);
4929}
4930
4931/**
4932 * xmlParseEnumeratedType:
4933 * @ctxt: an XML parser context
4934 * @tree: the enumeration tree built while parsing
4935 *
4936 * parse an Enumerated attribute type.
4937 *
4938 * [57] EnumeratedType ::= NotationType | Enumeration
4939 *
4940 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4941 *
4942 *
4943 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4944 */
4945
4946int
4947xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004948 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004949 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004950 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4952 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004953 return(0);
4954 }
4955 SKIP_BLANKS;
4956 *tree = xmlParseNotationType(ctxt);
4957 if (*tree == NULL) return(0);
4958 return(XML_ATTRIBUTE_NOTATION);
4959 }
4960 *tree = xmlParseEnumerationType(ctxt);
4961 if (*tree == NULL) return(0);
4962 return(XML_ATTRIBUTE_ENUMERATION);
4963}
4964
4965/**
4966 * xmlParseAttributeType:
4967 * @ctxt: an XML parser context
4968 * @tree: the enumeration tree built while parsing
4969 *
4970 * parse the Attribute list def for an element
4971 *
4972 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4973 *
4974 * [55] StringType ::= 'CDATA'
4975 *
4976 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4977 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4978 *
4979 * Validity constraints for attribute values syntax are checked in
4980 * xmlValidateAttributeValue()
4981 *
4982 * [ VC: ID ]
4983 * Values of type ID must match the Name production. A name must not
4984 * appear more than once in an XML document as a value of this type;
4985 * i.e., ID values must uniquely identify the elements which bear them.
4986 *
4987 * [ VC: One ID per Element Type ]
4988 * No element type may have more than one ID attribute specified.
4989 *
4990 * [ VC: ID Attribute Default ]
4991 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4992 *
4993 * [ VC: IDREF ]
4994 * Values of type IDREF must match the Name production, and values
4995 * of type IDREFS must match Names; each IDREF Name must match the value
4996 * of an ID attribute on some element in the XML document; i.e. IDREF
4997 * values must match the value of some ID attribute.
4998 *
4999 * [ VC: Entity Name ]
5000 * Values of type ENTITY must match the Name production, values
5001 * of type ENTITIES must match Names; each Entity Name must match the
5002 * name of an unparsed entity declared in the DTD.
5003 *
5004 * [ VC: Name Token ]
5005 * Values of type NMTOKEN must match the Nmtoken production; values
5006 * of type NMTOKENS must match Nmtokens.
5007 *
5008 * Returns the attribute type
5009 */
5010int
5011xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5012 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005013 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005014 SKIP(5);
5015 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005016 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005017 SKIP(6);
5018 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005019 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005020 SKIP(5);
5021 return(XML_ATTRIBUTE_IDREF);
5022 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5023 SKIP(2);
5024 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005025 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005026 SKIP(6);
5027 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005028 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005029 SKIP(8);
5030 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005031 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005032 SKIP(8);
5033 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005034 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005035 SKIP(7);
5036 return(XML_ATTRIBUTE_NMTOKEN);
5037 }
5038 return(xmlParseEnumeratedType(ctxt, tree));
5039}
5040
5041/**
5042 * xmlParseAttributeListDecl:
5043 * @ctxt: an XML parser context
5044 *
5045 * : parse the Attribute list def for an element
5046 *
5047 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5048 *
5049 * [53] AttDef ::= S Name S AttType S DefaultDecl
5050 *
5051 */
5052void
5053xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005054 const xmlChar *elemName;
5055 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005056 xmlEnumerationPtr tree;
5057
Daniel Veillarda07050d2003-10-19 14:46:32 +00005058 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 xmlParserInputPtr input = ctxt->input;
5060
5061 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005062 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005064 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005065 }
5066 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005067 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005068 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5070 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005071 return;
5072 }
5073 SKIP_BLANKS;
5074 GROW;
5075 while (RAW != '>') {
5076 const xmlChar *check = CUR_PTR;
5077 int type;
5078 int def;
5079 xmlChar *defaultValue = NULL;
5080
5081 GROW;
5082 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005083 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005084 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5086 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005087 break;
5088 }
5089 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005090 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005092 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005093 break;
5094 }
5095 SKIP_BLANKS;
5096
5097 type = xmlParseAttributeType(ctxt, &tree);
5098 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005099 break;
5100 }
5101
5102 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005103 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5105 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005106 if (tree != NULL)
5107 xmlFreeEnumeration(tree);
5108 break;
5109 }
5110 SKIP_BLANKS;
5111
5112 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5113 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if (defaultValue != NULL)
5115 xmlFree(defaultValue);
5116 if (tree != NULL)
5117 xmlFreeEnumeration(tree);
5118 break;
5119 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005120 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5121 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005122
5123 GROW;
5124 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005125 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005127 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005128 if (defaultValue != NULL)
5129 xmlFree(defaultValue);
5130 if (tree != NULL)
5131 xmlFreeEnumeration(tree);
5132 break;
5133 }
5134 SKIP_BLANKS;
5135 }
5136 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005137 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5138 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005139 if (defaultValue != NULL)
5140 xmlFree(defaultValue);
5141 if (tree != NULL)
5142 xmlFreeEnumeration(tree);
5143 break;
5144 }
5145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5146 (ctxt->sax->attributeDecl != NULL))
5147 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5148 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005149 else if (tree != NULL)
5150 xmlFreeEnumeration(tree);
5151
5152 if ((ctxt->sax2) && (defaultValue != NULL) &&
5153 (def != XML_ATTRIBUTE_IMPLIED) &&
5154 (def != XML_ATTRIBUTE_REQUIRED)) {
5155 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5156 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005157 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005158 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5159 }
Owen Taylor3473f882001-02-23 17:55:21 +00005160 if (defaultValue != NULL)
5161 xmlFree(defaultValue);
5162 GROW;
5163 }
5164 if (RAW == '>') {
5165 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005166 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5167 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005168 }
5169 NEXT;
5170 }
Owen Taylor3473f882001-02-23 17:55:21 +00005171 }
5172}
5173
5174/**
5175 * xmlParseElementMixedContentDecl:
5176 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005177 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005178 *
5179 * parse the declaration for a Mixed Element content
5180 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5181 *
5182 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5183 * '(' S? '#PCDATA' S? ')'
5184 *
5185 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5186 *
5187 * [ VC: No Duplicate Types ]
5188 * The same name must not appear more than once in a single
5189 * mixed-content declaration.
5190 *
5191 * returns: the list of the xmlElementContentPtr describing the element choices
5192 */
5193xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005195 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005196 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005197
5198 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005199 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005200 SKIP(7);
5201 SKIP_BLANKS;
5202 SHRINK;
5203 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005205 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5206"Element content declaration doesn't start and stop in the same entity\n",
5207 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005208 }
Owen Taylor3473f882001-02-23 17:55:21 +00005209 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005210 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005211 if (RAW == '*') {
5212 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5213 NEXT;
5214 }
5215 return(ret);
5216 }
5217 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (ret == NULL) return(NULL);
5220 }
5221 while (RAW == '|') {
5222 NEXT;
5223 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005224 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005225 if (ret == NULL) return(NULL);
5226 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005227 if (cur != NULL)
5228 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005229 cur = ret;
5230 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005231 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005232 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005233 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005234 if (n->c1 != NULL)
5235 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005236 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005237 if (n != NULL)
5238 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005240 }
5241 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005242 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005243 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005244 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005245 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 return(NULL);
5248 }
5249 SKIP_BLANKS;
5250 GROW;
5251 }
5252 if ((RAW == ')') && (NXT(1) == '*')) {
5253 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005255 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005256 if (cur->c2 != NULL)
5257 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005258 }
5259 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005260 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005261 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5262"Element content declaration doesn't start and stop in the same entity\n",
5263 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005264 }
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(2);
5266 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005268 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005269 return(NULL);
5270 }
5271
5272 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005273 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005274 }
5275 return(ret);
5276}
5277
5278/**
5279 * xmlParseElementChildrenContentDecl:
5280 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005281 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005282 *
5283 * parse the declaration for a Mixed Element content
5284 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5285 *
5286 *
5287 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5288 *
5289 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5290 *
5291 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5292 *
5293 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5294 *
5295 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5296 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005297 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005298 * opening or closing parentheses in a choice, seq, or Mixed
5299 * construct is contained in the replacement text for a parameter
5300 * entity, both must be contained in the same replacement text. For
5301 * interoperability, if a parameter-entity reference appears in a
5302 * choice, seq, or Mixed construct, its replacement text should not
5303 * be empty, and neither the first nor last non-blank character of
5304 * the replacement text should be a connector (| or ,).
5305 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005306 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005307 * hierarchy.
5308 */
5309xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005311 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005312 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005313 xmlChar type = 0;
5314
5315 SKIP_BLANKS;
5316 GROW;
5317 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005319
Owen Taylor3473f882001-02-23 17:55:21 +00005320 /* Recurse on first child */
5321 NEXT;
5322 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005323 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005324 SKIP_BLANKS;
5325 GROW;
5326 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005327 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005329 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005330 return(NULL);
5331 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005332 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005333 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005335 return(NULL);
5336 }
Owen Taylor3473f882001-02-23 17:55:21 +00005337 GROW;
5338 if (RAW == '?') {
5339 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5340 NEXT;
5341 } else if (RAW == '*') {
5342 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5343 NEXT;
5344 } else if (RAW == '+') {
5345 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5346 NEXT;
5347 } else {
5348 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5349 }
Owen Taylor3473f882001-02-23 17:55:21 +00005350 GROW;
5351 }
5352 SKIP_BLANKS;
5353 SHRINK;
5354 while (RAW != ')') {
5355 /*
5356 * Each loop we parse one separator and one element.
5357 */
5358 if (RAW == ',') {
5359 if (type == 0) type = CUR;
5360
5361 /*
5362 * Detect "Name | Name , Name" error
5363 */
5364 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005365 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005366 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005368 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005369 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005370 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005371 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005372 return(NULL);
5373 }
5374 NEXT;
5375
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005376 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005377 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005378 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005379 xmlFreeDocElementContent(ctxt->myDoc, last);
5380 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005381 return(NULL);
5382 }
5383 if (last == NULL) {
5384 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005385 if (ret != NULL)
5386 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005387 ret = cur = op;
5388 } else {
5389 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005390 if (op != NULL)
5391 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005392 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005393 if (last != NULL)
5394 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005395 cur =op;
5396 last = NULL;
5397 }
5398 } else if (RAW == '|') {
5399 if (type == 0) type = CUR;
5400
5401 /*
5402 * Detect "Name , Name | Name" error
5403 */
5404 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005405 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005406 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005407 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005408 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005409 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005410 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005411 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 return(NULL);
5413 }
5414 NEXT;
5415
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005416 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005418 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005419 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005421 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005422 return(NULL);
5423 }
5424 if (last == NULL) {
5425 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005426 if (ret != NULL)
5427 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 ret = cur = op;
5429 } else {
5430 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005431 if (op != NULL)
5432 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005433 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005434 if (last != NULL)
5435 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 cur =op;
5437 last = NULL;
5438 }
5439 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005440 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005441 if ((last != NULL) && (last != ret))
5442 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005443 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005444 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005445 return(NULL);
5446 }
5447 GROW;
5448 SKIP_BLANKS;
5449 GROW;
5450 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005451 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005452 /* Recurse on second child */
5453 NEXT;
5454 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005455 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 SKIP_BLANKS;
5457 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005458 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005460 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005462 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005463 return(NULL);
5464 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005465 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005466 if (RAW == '?') {
5467 last->ocur = XML_ELEMENT_CONTENT_OPT;
5468 NEXT;
5469 } else if (RAW == '*') {
5470 last->ocur = XML_ELEMENT_CONTENT_MULT;
5471 NEXT;
5472 } else if (RAW == '+') {
5473 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5474 NEXT;
5475 } else {
5476 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5477 }
5478 }
5479 SKIP_BLANKS;
5480 GROW;
5481 }
5482 if ((cur != NULL) && (last != NULL)) {
5483 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005484 if (last != NULL)
5485 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005486 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005488 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5489"Element content declaration doesn't start and stop in the same entity\n",
5490 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005491 }
Owen Taylor3473f882001-02-23 17:55:21 +00005492 NEXT;
5493 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005494 if (ret != NULL) {
5495 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5496 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5497 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5498 else
5499 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5500 }
Owen Taylor3473f882001-02-23 17:55:21 +00005501 NEXT;
5502 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005503 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005504 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005505 cur = ret;
5506 /*
5507 * Some normalization:
5508 * (a | b* | c?)* == (a | b | c)*
5509 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005510 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005511 if ((cur->c1 != NULL) &&
5512 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5513 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5514 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5515 if ((cur->c2 != NULL) &&
5516 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5517 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5518 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5519 cur = cur->c2;
5520 }
5521 }
Owen Taylor3473f882001-02-23 17:55:21 +00005522 NEXT;
5523 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005524 if (ret != NULL) {
5525 int found = 0;
5526
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005527 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5528 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5529 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005530 else
5531 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005532 /*
5533 * Some normalization:
5534 * (a | b*)+ == (a | b)*
5535 * (a | b?)+ == (a | b)*
5536 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005537 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005538 if ((cur->c1 != NULL) &&
5539 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5540 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5541 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5542 found = 1;
5543 }
5544 if ((cur->c2 != NULL) &&
5545 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5546 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5547 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5548 found = 1;
5549 }
5550 cur = cur->c2;
5551 }
5552 if (found)
5553 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5554 }
Owen Taylor3473f882001-02-23 17:55:21 +00005555 NEXT;
5556 }
5557 return(ret);
5558}
5559
5560/**
5561 * xmlParseElementContentDecl:
5562 * @ctxt: an XML parser context
5563 * @name: the name of the element being defined.
5564 * @result: the Element Content pointer will be stored here if any
5565 *
5566 * parse the declaration for an Element content either Mixed or Children,
5567 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5568 *
5569 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5570 *
5571 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5572 */
5573
5574int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005575xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005576 xmlElementContentPtr *result) {
5577
5578 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005579 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005580 int res;
5581
5582 *result = NULL;
5583
5584 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005585 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005586 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005587 return(-1);
5588 }
5589 NEXT;
5590 GROW;
5591 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005592 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005593 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005594 res = XML_ELEMENT_TYPE_MIXED;
5595 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005596 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005597 res = XML_ELEMENT_TYPE_ELEMENT;
5598 }
Owen Taylor3473f882001-02-23 17:55:21 +00005599 SKIP_BLANKS;
5600 *result = tree;
5601 return(res);
5602}
5603
5604/**
5605 * xmlParseElementDecl:
5606 * @ctxt: an XML parser context
5607 *
5608 * parse an Element declaration.
5609 *
5610 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5611 *
5612 * [ VC: Unique Element Type Declaration ]
5613 * No element type may be declared more than once
5614 *
5615 * Returns the type of the element, or -1 in case of error
5616 */
5617int
5618xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005619 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005620 int ret = -1;
5621 xmlElementContentPtr content = NULL;
5622
Daniel Veillard4c778d82005-01-23 17:37:44 +00005623 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005624 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005625 xmlParserInputPtr input = ctxt->input;
5626
5627 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005628 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5630 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
5632 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005633 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005634 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5636 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005637 return(-1);
5638 }
5639 while ((RAW == 0) && (ctxt->inputNr > 1))
5640 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005641 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5643 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005644 }
5645 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005646 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005647 SKIP(5);
5648 /*
5649 * Element must always be empty.
5650 */
5651 ret = XML_ELEMENT_TYPE_EMPTY;
5652 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5653 (NXT(2) == 'Y')) {
5654 SKIP(3);
5655 /*
5656 * Element is a generic container.
5657 */
5658 ret = XML_ELEMENT_TYPE_ANY;
5659 } else if (RAW == '(') {
5660 ret = xmlParseElementContentDecl(ctxt, name, &content);
5661 } else {
5662 /*
5663 * [ WFC: PEs in Internal Subset ] error handling.
5664 */
5665 if ((RAW == '%') && (ctxt->external == 0) &&
5666 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005667 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005668 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005669 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005670 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005671 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5672 }
Owen Taylor3473f882001-02-23 17:55:21 +00005673 return(-1);
5674 }
5675
5676 SKIP_BLANKS;
5677 /*
5678 * Pop-up of finished entities.
5679 */
5680 while ((RAW == 0) && (ctxt->inputNr > 1))
5681 xmlPopInput(ctxt);
5682 SKIP_BLANKS;
5683
5684 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005685 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005686 if (content != NULL) {
5687 xmlFreeDocElementContent(ctxt->myDoc, content);
5688 }
Owen Taylor3473f882001-02-23 17:55:21 +00005689 } else {
5690 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005691 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5692 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005693 }
5694
5695 NEXT;
5696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005697 (ctxt->sax->elementDecl != NULL)) {
5698 if (content != NULL)
5699 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005700 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5701 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005702 if ((content != NULL) && (content->parent == NULL)) {
5703 /*
5704 * this is a trick: if xmlAddElementDecl is called,
5705 * instead of copying the full tree it is plugged directly
5706 * if called from the parser. Avoid duplicating the
5707 * interfaces or change the API/ABI
5708 */
5709 xmlFreeDocElementContent(ctxt->myDoc, content);
5710 }
5711 } else if (content != NULL) {
5712 xmlFreeDocElementContent(ctxt->myDoc, content);
5713 }
Owen Taylor3473f882001-02-23 17:55:21 +00005714 }
Owen Taylor3473f882001-02-23 17:55:21 +00005715 }
5716 return(ret);
5717}
5718
5719/**
Owen Taylor3473f882001-02-23 17:55:21 +00005720 * xmlParseConditionalSections
5721 * @ctxt: an XML parser context
5722 *
5723 * [61] conditionalSect ::= includeSect | ignoreSect
5724 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5725 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5726 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5727 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5728 */
5729
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005730static void
Owen Taylor3473f882001-02-23 17:55:21 +00005731xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5732 SKIP(3);
5733 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005734 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005735 SKIP(7);
5736 SKIP_BLANKS;
5737 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005738 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 } else {
5740 NEXT;
5741 }
5742 if (xmlParserDebugEntities) {
5743 if ((ctxt->input != NULL) && (ctxt->input->filename))
5744 xmlGenericError(xmlGenericErrorContext,
5745 "%s(%d): ", ctxt->input->filename,
5746 ctxt->input->line);
5747 xmlGenericError(xmlGenericErrorContext,
5748 "Entering INCLUDE Conditional Section\n");
5749 }
5750
5751 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5752 (NXT(2) != '>'))) {
5753 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005754 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005755
5756 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5757 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005758 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005759 NEXT;
5760 } else if (RAW == '%') {
5761 xmlParsePEReference(ctxt);
5762 } else
5763 xmlParseMarkupDecl(ctxt);
5764
5765 /*
5766 * Pop-up of finished entities.
5767 */
5768 while ((RAW == 0) && (ctxt->inputNr > 1))
5769 xmlPopInput(ctxt);
5770
Daniel Veillardfdc91562002-07-01 21:52:03 +00005771 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 break;
5774 }
5775 }
5776 if (xmlParserDebugEntities) {
5777 if ((ctxt->input != NULL) && (ctxt->input->filename))
5778 xmlGenericError(xmlGenericErrorContext,
5779 "%s(%d): ", ctxt->input->filename,
5780 ctxt->input->line);
5781 xmlGenericError(xmlGenericErrorContext,
5782 "Leaving INCLUDE Conditional Section\n");
5783 }
5784
Daniel Veillarda07050d2003-10-19 14:46:32 +00005785 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005786 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005787 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005788 int depth = 0;
5789
5790 SKIP(6);
5791 SKIP_BLANKS;
5792 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005793 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005794 } else {
5795 NEXT;
5796 }
5797 if (xmlParserDebugEntities) {
5798 if ((ctxt->input != NULL) && (ctxt->input->filename))
5799 xmlGenericError(xmlGenericErrorContext,
5800 "%s(%d): ", ctxt->input->filename,
5801 ctxt->input->line);
5802 xmlGenericError(xmlGenericErrorContext,
5803 "Entering IGNORE Conditional Section\n");
5804 }
5805
5806 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005807 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005808 * But disable SAX event generating DTD building in the meantime
5809 */
5810 state = ctxt->disableSAX;
5811 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005813 ctxt->instate = XML_PARSER_IGNORE;
5814
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005815 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005816 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5817 depth++;
5818 SKIP(3);
5819 continue;
5820 }
5821 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5822 if (--depth >= 0) SKIP(3);
5823 continue;
5824 }
5825 NEXT;
5826 continue;
5827 }
5828
5829 ctxt->disableSAX = state;
5830 ctxt->instate = instate;
5831
5832 if (xmlParserDebugEntities) {
5833 if ((ctxt->input != NULL) && (ctxt->input->filename))
5834 xmlGenericError(xmlGenericErrorContext,
5835 "%s(%d): ", ctxt->input->filename,
5836 ctxt->input->line);
5837 xmlGenericError(xmlGenericErrorContext,
5838 "Leaving IGNORE Conditional Section\n");
5839 }
5840
5841 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005842 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 }
5844
5845 if (RAW == 0)
5846 SHRINK;
5847
5848 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005850 } else {
5851 SKIP(3);
5852 }
5853}
5854
5855/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005856 * xmlParseMarkupDecl:
5857 * @ctxt: an XML parser context
5858 *
5859 * parse Markup declarations
5860 *
5861 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5862 * NotationDecl | PI | Comment
5863 *
5864 * [ VC: Proper Declaration/PE Nesting ]
5865 * Parameter-entity replacement text must be properly nested with
5866 * markup declarations. That is to say, if either the first character
5867 * or the last character of a markup declaration (markupdecl above) is
5868 * contained in the replacement text for a parameter-entity reference,
5869 * both must be contained in the same replacement text.
5870 *
5871 * [ WFC: PEs in Internal Subset ]
5872 * In the internal DTD subset, parameter-entity references can occur
5873 * only where markup declarations can occur, not within markup declarations.
5874 * (This does not apply to references that occur in external parameter
5875 * entities or to the external subset.)
5876 */
5877void
5878xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5879 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005880 if (CUR == '<') {
5881 if (NXT(1) == '!') {
5882 switch (NXT(2)) {
5883 case 'E':
5884 if (NXT(3) == 'L')
5885 xmlParseElementDecl(ctxt);
5886 else if (NXT(3) == 'N')
5887 xmlParseEntityDecl(ctxt);
5888 break;
5889 case 'A':
5890 xmlParseAttributeListDecl(ctxt);
5891 break;
5892 case 'N':
5893 xmlParseNotationDecl(ctxt);
5894 break;
5895 case '-':
5896 xmlParseComment(ctxt);
5897 break;
5898 default:
5899 /* there is an error but it will be detected later */
5900 break;
5901 }
5902 } else if (NXT(1) == '?') {
5903 xmlParsePI(ctxt);
5904 }
5905 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005906 /*
5907 * This is only for internal subset. On external entities,
5908 * the replacement is done before parsing stage
5909 */
5910 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5911 xmlParsePEReference(ctxt);
5912
5913 /*
5914 * Conditional sections are allowed from entities included
5915 * by PE References in the internal subset.
5916 */
5917 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5918 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5919 xmlParseConditionalSections(ctxt);
5920 }
5921 }
5922
5923 ctxt->instate = XML_PARSER_DTD;
5924}
5925
5926/**
5927 * xmlParseTextDecl:
5928 * @ctxt: an XML parser context
5929 *
5930 * parse an XML declaration header for external entities
5931 *
5932 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5933 *
5934 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5935 */
5936
5937void
5938xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5939 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005940 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005941
5942 /*
5943 * We know that '<?xml' is here.
5944 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005945 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005946 SKIP(5);
5947 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005948 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005949 return;
5950 }
5951
William M. Brack76e95df2003-10-18 16:20:14 +00005952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5954 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005955 }
5956 SKIP_BLANKS;
5957
5958 /*
5959 * We may have the VersionInfo here.
5960 */
5961 version = xmlParseVersionInfo(ctxt);
5962 if (version == NULL)
5963 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005964 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005965 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5967 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005968 }
5969 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005970 ctxt->input->version = version;
5971
5972 /*
5973 * We must have the encoding declaration
5974 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005975 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005976 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5977 /*
5978 * The XML REC instructs us to stop parsing right here
5979 */
5980 return;
5981 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005982 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5983 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5984 "Missing encoding in text declaration\n");
5985 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005986
5987 SKIP_BLANKS;
5988 if ((RAW == '?') && (NXT(1) == '>')) {
5989 SKIP(2);
5990 } else if (RAW == '>') {
5991 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005992 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005993 NEXT;
5994 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005995 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005996 MOVETO_ENDTAG(CUR_PTR);
5997 NEXT;
5998 }
5999}
6000
6001/**
Owen Taylor3473f882001-02-23 17:55:21 +00006002 * xmlParseExternalSubset:
6003 * @ctxt: an XML parser context
6004 * @ExternalID: the external identifier
6005 * @SystemID: the system identifier (or URL)
6006 *
6007 * parse Markup declarations from an external subset
6008 *
6009 * [30] extSubset ::= textDecl? extSubsetDecl
6010 *
6011 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6012 */
6013void
6014xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6015 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006016 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006017 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006018 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006019 xmlParseTextDecl(ctxt);
6020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6021 /*
6022 * The XML REC instructs us to stop parsing right here
6023 */
6024 ctxt->instate = XML_PARSER_EOF;
6025 return;
6026 }
6027 }
6028 if (ctxt->myDoc == NULL) {
6029 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6030 }
6031 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6032 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6033
6034 ctxt->instate = XML_PARSER_DTD;
6035 ctxt->external = 1;
6036 while (((RAW == '<') && (NXT(1) == '?')) ||
6037 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006038 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006039 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006040 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006041
6042 GROW;
6043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6044 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006045 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006046 NEXT;
6047 } else if (RAW == '%') {
6048 xmlParsePEReference(ctxt);
6049 } else
6050 xmlParseMarkupDecl(ctxt);
6051
6052 /*
6053 * Pop-up of finished entities.
6054 */
6055 while ((RAW == 0) && (ctxt->inputNr > 1))
6056 xmlPopInput(ctxt);
6057
Daniel Veillardfdc91562002-07-01 21:52:03 +00006058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006059 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 break;
6061 }
6062 }
6063
6064 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006065 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006066 }
6067
6068}
6069
6070/**
6071 * xmlParseReference:
6072 * @ctxt: an XML parser context
6073 *
6074 * parse and handle entity references in content, depending on the SAX
6075 * interface, this may end-up in a call to character() if this is a
6076 * CharRef, a predefined entity, if there is no reference() callback.
6077 * or if the parser was asked to switch to that mode.
6078 *
6079 * [67] Reference ::= EntityRef | CharRef
6080 */
6081void
6082xmlParseReference(xmlParserCtxtPtr ctxt) {
6083 xmlEntityPtr ent;
6084 xmlChar *val;
6085 if (RAW != '&') return;
6086
6087 if (NXT(1) == '#') {
6088 int i = 0;
6089 xmlChar out[10];
6090 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006091 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006092
6093 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6094 /*
6095 * So we are using non-UTF-8 buffers
6096 * Check that the char fit on 8bits, if not
6097 * generate a CharRef.
6098 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006099 if (value <= 0xFF) {
6100 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 out[1] = 0;
6102 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6103 (!ctxt->disableSAX))
6104 ctxt->sax->characters(ctxt->userData, out, 1);
6105 } else {
6106 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006107 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006109 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6111 (!ctxt->disableSAX))
6112 ctxt->sax->reference(ctxt->userData, out);
6113 }
6114 } else {
6115 /*
6116 * Just encode the value in UTF-8
6117 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006118 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006119 out[i] = 0;
6120 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6121 (!ctxt->disableSAX))
6122 ctxt->sax->characters(ctxt->userData, out, i);
6123 }
6124 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006125 int was_checked;
6126
Owen Taylor3473f882001-02-23 17:55:21 +00006127 ent = xmlParseEntityRef(ctxt);
6128 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006129 if (!ctxt->wellFormed)
6130 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006131 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006132 if ((ent->name != NULL) &&
6133 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6134 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006135 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006136
6137
6138 /*
6139 * The first reference to the entity trigger a parsing phase
6140 * where the ent->children is filled with the result from
6141 * the parsing.
6142 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006143 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006144 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006145
Owen Taylor3473f882001-02-23 17:55:21 +00006146 value = ent->content;
6147
6148 /*
6149 * Check that this entity is well formed
6150 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006151 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006152 (value[1] == 0) && (value[0] == '<') &&
6153 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6154 /*
6155 * DONE: get definite answer on this !!!
6156 * Lots of entity decls are used to declare a single
6157 * char
6158 * <!ENTITY lt "<">
6159 * Which seems to be valid since
6160 * 2.4: The ampersand character (&) and the left angle
6161 * bracket (<) may appear in their literal form only
6162 * when used ... They are also legal within the literal
6163 * entity value of an internal entity declaration;i
6164 * see "4.3.2 Well-Formed Parsed Entities".
6165 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6166 * Looking at the OASIS test suite and James Clark
6167 * tests, this is broken. However the XML REC uses
6168 * it. Is the XML REC not well-formed ????
6169 * This is a hack to avoid this problem
6170 *
6171 * ANSWER: since lt gt amp .. are already defined,
6172 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006173 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006174 * is lousy but acceptable.
6175 */
6176 list = xmlNewDocText(ctxt->myDoc, value);
6177 if (list != NULL) {
6178 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6179 (ent->children == NULL)) {
6180 ent->children = list;
6181 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006182 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 list->parent = (xmlNodePtr) ent;
6184 } else {
6185 xmlFreeNodeList(list);
6186 }
6187 } else if (list != NULL) {
6188 xmlFreeNodeList(list);
6189 }
6190 } else {
6191 /*
6192 * 4.3.2: An internal general parsed entity is well-formed
6193 * if its replacement text matches the production labeled
6194 * content.
6195 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006196
6197 void *user_data;
6198 /*
6199 * This is a bit hackish but this seems the best
6200 * way to make sure both SAX and DOM entity support
6201 * behaves okay.
6202 */
6203 if (ctxt->userData == ctxt)
6204 user_data = NULL;
6205 else
6206 user_data = ctxt->userData;
6207
Owen Taylor3473f882001-02-23 17:55:21 +00006208 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6209 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006210 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6211 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006212 ctxt->depth--;
6213 } else if (ent->etype ==
6214 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6215 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006216 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006217 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006218 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006219 ctxt->depth--;
6220 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006221 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006222 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6223 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006224 }
6225 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006227 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006228 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006229 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6230 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006231 (ent->children == NULL)) {
6232 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006233 if (ctxt->replaceEntities) {
6234 /*
6235 * Prune it directly in the generated document
6236 * except for single text nodes.
6237 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006238 if (((list->type == XML_TEXT_NODE) &&
6239 (list->next == NULL)) ||
6240 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006241 list->parent = (xmlNodePtr) ent;
6242 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006243 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006244 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006245 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006246 while (list != NULL) {
6247 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006248 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006249 if (list->next == NULL)
6250 ent->last = list;
6251 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006252 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006253 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006254#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006255 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6256 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006257#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006258 }
6259 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006260 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006261 while (list != NULL) {
6262 list->parent = (xmlNodePtr) ent;
6263 if (list->next == NULL)
6264 ent->last = list;
6265 list = list->next;
6266 }
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
6268 } else {
6269 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006270 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006272 } else if ((ret != XML_ERR_OK) &&
6273 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006274 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6275 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 } else if (list != NULL) {
6277 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 }
6280 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006281 ent->checked = 1;
6282 }
6283
6284 if (ent->children == NULL) {
6285 /*
6286 * Probably running in SAX mode and the callbacks don't
6287 * build the entity content. So unless we already went
6288 * though parsing for first checking go though the entity
6289 * content to generate callbacks associated to the entity
6290 */
6291 if (was_checked == 1) {
6292 void *user_data;
6293 /*
6294 * This is a bit hackish but this seems the best
6295 * way to make sure both SAX and DOM entity support
6296 * behaves okay.
6297 */
6298 if (ctxt->userData == ctxt)
6299 user_data = NULL;
6300 else
6301 user_data = ctxt->userData;
6302
6303 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6304 ctxt->depth++;
6305 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6306 ent->content, user_data, NULL);
6307 ctxt->depth--;
6308 } else if (ent->etype ==
6309 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6310 ctxt->depth++;
6311 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6312 ctxt->sax, user_data, ctxt->depth,
6313 ent->URI, ent->ExternalID, NULL);
6314 ctxt->depth--;
6315 } else {
6316 ret = XML_ERR_ENTITY_PE_INTERNAL;
6317 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6318 "invalid entity type found\n", NULL);
6319 }
6320 if (ret == XML_ERR_ENTITY_LOOP) {
6321 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6322 return;
6323 }
6324 }
6325 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6326 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6327 /*
6328 * Entity reference callback comes second, it's somewhat
6329 * superfluous but a compatibility to historical behaviour
6330 */
6331 ctxt->sax->reference(ctxt->userData, ent->name);
6332 }
6333 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006334 }
6335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006337 /*
6338 * Create a node.
6339 */
6340 ctxt->sax->reference(ctxt->userData, ent->name);
6341 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006342 }
6343 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006344 /*
6345 * There is a problem on the handling of _private for entities
6346 * (bug 155816): Should we copy the content of the field from
6347 * the entity (possibly overwriting some value set by the user
6348 * when a copy is created), should we leave it alone, or should
6349 * we try to take care of different situations? The problem
6350 * is exacerbated by the usage of this field by the xmlReader.
6351 * To fix this bug, we look at _private on the created node
6352 * and, if it's NULL, we copy in whatever was in the entity.
6353 * If it's not NULL we leave it alone. This is somewhat of a
6354 * hack - maybe we should have further tests to determine
6355 * what to do.
6356 */
Owen Taylor3473f882001-02-23 17:55:21 +00006357 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6358 /*
6359 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006360 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006361 * In the first occurrence list contains the replacement.
6362 * progressive == 2 means we are operating on the Reader
6363 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006364 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006365 if (((list == NULL) && (ent->owner == 0)) ||
6366 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006367 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006368
6369 /*
6370 * when operating on a reader, the entities definitions
6371 * are always owning the entities subtree.
6372 if (ctxt->parseMode == XML_PARSE_READER)
6373 ent->owner = 1;
6374 */
6375
Daniel Veillard62f313b2001-07-04 19:49:14 +00006376 cur = ent->children;
6377 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006378 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006379 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006380 if (nw->_private == NULL)
6381 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006382 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006383 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006384 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006385 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006386 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006387 if (cur == ent->last) {
6388 /*
6389 * needed to detect some strange empty
6390 * node cases in the reader tests
6391 */
6392 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006393 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006394 (nw->type == XML_ELEMENT_NODE) &&
6395 (nw->children == NULL))
6396 nw->extra = 1;
6397
Daniel Veillard62f313b2001-07-04 19:49:14 +00006398 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006399 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006400 cur = cur->next;
6401 }
Daniel Veillard81273902003-09-30 00:43:48 +00006402#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006403 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006404 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006405#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006406 } else if (list == NULL) {
6407 xmlNodePtr nw = NULL, cur, next, last,
6408 firstChild = NULL;
6409 /*
6410 * Copy the entity child list and make it the new
6411 * entity child list. The goal is to make sure any
6412 * ID or REF referenced will be the one from the
6413 * document content and not the entity copy.
6414 */
6415 cur = ent->children;
6416 ent->children = NULL;
6417 last = ent->last;
6418 ent->last = NULL;
6419 while (cur != NULL) {
6420 next = cur->next;
6421 cur->next = NULL;
6422 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006423 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006424 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006425 if (nw->_private == NULL)
6426 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006427 if (firstChild == NULL){
6428 firstChild = cur;
6429 }
6430 xmlAddChild((xmlNodePtr) ent, nw);
6431 xmlAddChild(ctxt->node, cur);
6432 }
6433 if (cur == last)
6434 break;
6435 cur = next;
6436 }
6437 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006438#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006439 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6440 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006441#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006442 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006443 const xmlChar *nbktext;
6444
Daniel Veillard62f313b2001-07-04 19:49:14 +00006445 /*
6446 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006447 * node with a possible previous text one which
6448 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006449 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006450 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6451 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006452 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006453 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006454 if ((ent->last != ent->children) &&
6455 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006456 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006457 xmlAddChildList(ctxt->node, ent->children);
6458 }
6459
Owen Taylor3473f882001-02-23 17:55:21 +00006460 /*
6461 * This is to avoid a nasty side effect, see
6462 * characters() in SAX.c
6463 */
6464 ctxt->nodemem = 0;
6465 ctxt->nodelen = 0;
6466 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006467 }
6468 }
6469 } else {
6470 val = ent->content;
6471 if (val == NULL) return;
6472 /*
6473 * inline the entity.
6474 */
6475 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6476 (!ctxt->disableSAX))
6477 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6478 }
6479 }
6480}
6481
6482/**
6483 * xmlParseEntityRef:
6484 * @ctxt: an XML parser context
6485 *
6486 * parse ENTITY references declarations
6487 *
6488 * [68] EntityRef ::= '&' Name ';'
6489 *
6490 * [ WFC: Entity Declared ]
6491 * In a document without any DTD, a document with only an internal DTD
6492 * subset which contains no parameter entity references, or a document
6493 * with "standalone='yes'", the Name given in the entity reference
6494 * must match that in an entity declaration, except that well-formed
6495 * documents need not declare any of the following entities: amp, lt,
6496 * gt, apos, quot. The declaration of a parameter entity must precede
6497 * any reference to it. Similarly, the declaration of a general entity
6498 * must precede any reference to it which appears in a default value in an
6499 * attribute-list declaration. Note that if entities are declared in the
6500 * external subset or in external parameter entities, a non-validating
6501 * processor is not obligated to read and process their declarations;
6502 * for such documents, the rule that an entity must be declared is a
6503 * well-formedness constraint only if standalone='yes'.
6504 *
6505 * [ WFC: Parsed Entity ]
6506 * An entity reference must not contain the name of an unparsed entity
6507 *
6508 * Returns the xmlEntityPtr if found, or NULL otherwise.
6509 */
6510xmlEntityPtr
6511xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006512 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006513 xmlEntityPtr ent = NULL;
6514
6515 GROW;
6516
6517 if (RAW == '&') {
6518 NEXT;
6519 name = xmlParseName(ctxt);
6520 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006521 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6522 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006523 } else {
6524 if (RAW == ';') {
6525 NEXT;
6526 /*
6527 * Ask first SAX for entity resolution, otherwise try the
6528 * predefined set.
6529 */
6530 if (ctxt->sax != NULL) {
6531 if (ctxt->sax->getEntity != NULL)
6532 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006533 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006534 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006535 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6536 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006537 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006538 }
Owen Taylor3473f882001-02-23 17:55:21 +00006539 }
6540 /*
6541 * [ WFC: Entity Declared ]
6542 * In a document without any DTD, a document with only an
6543 * internal DTD subset which contains no parameter entity
6544 * references, or a document with "standalone='yes'", the
6545 * Name given in the entity reference must match that in an
6546 * entity declaration, except that well-formed documents
6547 * need not declare any of the following entities: amp, lt,
6548 * gt, apos, quot.
6549 * The declaration of a parameter entity must precede any
6550 * reference to it.
6551 * Similarly, the declaration of a general entity must
6552 * precede any reference to it which appears in a default
6553 * value in an attribute-list declaration. Note that if
6554 * entities are declared in the external subset or in
6555 * external parameter entities, a non-validating processor
6556 * is not obligated to read and process their declarations;
6557 * for such documents, the rule that an entity must be
6558 * declared is a well-formedness constraint only if
6559 * standalone='yes'.
6560 */
6561 if (ent == NULL) {
6562 if ((ctxt->standalone == 1) ||
6563 ((ctxt->hasExternalSubset == 0) &&
6564 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006565 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006566 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006567 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006568 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006569 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006570 if ((ctxt->inSubset == 0) &&
6571 (ctxt->sax != NULL) &&
6572 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006573 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006576 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006577 }
6578
6579 /*
6580 * [ WFC: Parsed Entity ]
6581 * An entity reference must not contain the name of an
6582 * unparsed entity
6583 */
6584 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006586 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006587 }
6588
6589 /*
6590 * [ WFC: No External Entity References ]
6591 * Attribute values cannot contain direct or indirect
6592 * entity references to external entities.
6593 */
6594 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6595 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006596 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6597 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006598 }
6599 /*
6600 * [ WFC: No < in Attribute Values ]
6601 * The replacement text of any entity referred to directly or
6602 * indirectly in an attribute value (other than "&lt;") must
6603 * not contain a <.
6604 */
6605 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6606 (ent != NULL) &&
6607 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6608 (ent->content != NULL) &&
6609 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006610 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006611 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006612 }
6613
6614 /*
6615 * Internal check, no parameter entities here ...
6616 */
6617 else {
6618 switch (ent->etype) {
6619 case XML_INTERNAL_PARAMETER_ENTITY:
6620 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006621 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6622 "Attempt to reference the parameter entity '%s'\n",
6623 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006624 break;
6625 default:
6626 break;
6627 }
6628 }
6629
6630 /*
6631 * [ WFC: No Recursion ]
6632 * A parsed entity must not contain a recursive reference
6633 * to itself, either directly or indirectly.
6634 * Done somewhere else
6635 */
6636
6637 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006638 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006639 }
Owen Taylor3473f882001-02-23 17:55:21 +00006640 }
6641 }
6642 return(ent);
6643}
6644
6645/**
6646 * xmlParseStringEntityRef:
6647 * @ctxt: an XML parser context
6648 * @str: a pointer to an index in the string
6649 *
6650 * parse ENTITY references declarations, but this version parses it from
6651 * a string value.
6652 *
6653 * [68] EntityRef ::= '&' Name ';'
6654 *
6655 * [ WFC: Entity Declared ]
6656 * In a document without any DTD, a document with only an internal DTD
6657 * subset which contains no parameter entity references, or a document
6658 * with "standalone='yes'", the Name given in the entity reference
6659 * must match that in an entity declaration, except that well-formed
6660 * documents need not declare any of the following entities: amp, lt,
6661 * gt, apos, quot. The declaration of a parameter entity must precede
6662 * any reference to it. Similarly, the declaration of a general entity
6663 * must precede any reference to it which appears in a default value in an
6664 * attribute-list declaration. Note that if entities are declared in the
6665 * external subset or in external parameter entities, a non-validating
6666 * processor is not obligated to read and process their declarations;
6667 * for such documents, the rule that an entity must be declared is a
6668 * well-formedness constraint only if standalone='yes'.
6669 *
6670 * [ WFC: Parsed Entity ]
6671 * An entity reference must not contain the name of an unparsed entity
6672 *
6673 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6674 * is updated to the current location in the string.
6675 */
6676xmlEntityPtr
6677xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6678 xmlChar *name;
6679 const xmlChar *ptr;
6680 xmlChar cur;
6681 xmlEntityPtr ent = NULL;
6682
6683 if ((str == NULL) || (*str == NULL))
6684 return(NULL);
6685 ptr = *str;
6686 cur = *ptr;
6687 if (cur == '&') {
6688 ptr++;
6689 cur = *ptr;
6690 name = xmlParseStringName(ctxt, &ptr);
6691 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006692 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6693 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006694 } else {
6695 if (*ptr == ';') {
6696 ptr++;
6697 /*
6698 * Ask first SAX for entity resolution, otherwise try the
6699 * predefined set.
6700 */
6701 if (ctxt->sax != NULL) {
6702 if (ctxt->sax->getEntity != NULL)
6703 ent = ctxt->sax->getEntity(ctxt->userData, name);
6704 if (ent == NULL)
6705 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006706 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006707 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006708 }
Owen Taylor3473f882001-02-23 17:55:21 +00006709 }
6710 /*
6711 * [ WFC: Entity Declared ]
6712 * In a document without any DTD, a document with only an
6713 * internal DTD subset which contains no parameter entity
6714 * references, or a document with "standalone='yes'", the
6715 * Name given in the entity reference must match that in an
6716 * entity declaration, except that well-formed documents
6717 * need not declare any of the following entities: amp, lt,
6718 * gt, apos, quot.
6719 * The declaration of a parameter entity must precede any
6720 * reference to it.
6721 * Similarly, the declaration of a general entity must
6722 * precede any reference to it which appears in a default
6723 * value in an attribute-list declaration. Note that if
6724 * entities are declared in the external subset or in
6725 * external parameter entities, a non-validating processor
6726 * is not obligated to read and process their declarations;
6727 * for such documents, the rule that an entity must be
6728 * declared is a well-formedness constraint only if
6729 * standalone='yes'.
6730 */
6731 if (ent == NULL) {
6732 if ((ctxt->standalone == 1) ||
6733 ((ctxt->hasExternalSubset == 0) &&
6734 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006735 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006736 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006737 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006738 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006739 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006740 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006741 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006742 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006743 }
6744
6745 /*
6746 * [ WFC: Parsed Entity ]
6747 * An entity reference must not contain the name of an
6748 * unparsed entity
6749 */
6750 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006751 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006752 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006753 }
6754
6755 /*
6756 * [ WFC: No External Entity References ]
6757 * Attribute values cannot contain direct or indirect
6758 * entity references to external entities.
6759 */
6760 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6761 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006762 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006763 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006764 }
6765 /*
6766 * [ WFC: No < in Attribute Values ]
6767 * The replacement text of any entity referred to directly or
6768 * indirectly in an attribute value (other than "&lt;") must
6769 * not contain a <.
6770 */
6771 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6772 (ent != NULL) &&
6773 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6774 (ent->content != NULL) &&
6775 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006776 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6777 "'<' in entity '%s' is not allowed in attributes values\n",
6778 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006779 }
6780
6781 /*
6782 * Internal check, no parameter entities here ...
6783 */
6784 else {
6785 switch (ent->etype) {
6786 case XML_INTERNAL_PARAMETER_ENTITY:
6787 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006788 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6789 "Attempt to reference the parameter entity '%s'\n",
6790 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006791 break;
6792 default:
6793 break;
6794 }
6795 }
6796
6797 /*
6798 * [ WFC: No Recursion ]
6799 * A parsed entity must not contain a recursive reference
6800 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006801 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006802 */
6803
6804 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006805 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006806 }
6807 xmlFree(name);
6808 }
6809 }
6810 *str = ptr;
6811 return(ent);
6812}
6813
6814/**
6815 * xmlParsePEReference:
6816 * @ctxt: an XML parser context
6817 *
6818 * parse PEReference declarations
6819 * The entity content is handled directly by pushing it's content as
6820 * a new input stream.
6821 *
6822 * [69] PEReference ::= '%' Name ';'
6823 *
6824 * [ WFC: No Recursion ]
6825 * A parsed entity must not contain a recursive
6826 * reference to itself, either directly or indirectly.
6827 *
6828 * [ WFC: Entity Declared ]
6829 * In a document without any DTD, a document with only an internal DTD
6830 * subset which contains no parameter entity references, or a document
6831 * with "standalone='yes'", ... ... The declaration of a parameter
6832 * entity must precede any reference to it...
6833 *
6834 * [ VC: Entity Declared ]
6835 * In a document with an external subset or external parameter entities
6836 * with "standalone='no'", ... ... The declaration of a parameter entity
6837 * must precede any reference to it...
6838 *
6839 * [ WFC: In DTD ]
6840 * Parameter-entity references may only appear in the DTD.
6841 * NOTE: misleading but this is handled.
6842 */
6843void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006844xmlParsePEReference(xmlParserCtxtPtr ctxt)
6845{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006846 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 xmlEntityPtr entity = NULL;
6848 xmlParserInputPtr input;
6849
6850 if (RAW == '%') {
6851 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006852 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006853 if (name == NULL) {
6854 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6855 "xmlParsePEReference: no name\n");
6856 } else {
6857 if (RAW == ';') {
6858 NEXT;
6859 if ((ctxt->sax != NULL) &&
6860 (ctxt->sax->getParameterEntity != NULL))
6861 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6862 name);
6863 if (entity == NULL) {
6864 /*
6865 * [ WFC: Entity Declared ]
6866 * In a document without any DTD, a document with only an
6867 * internal DTD subset which contains no parameter entity
6868 * references, or a document with "standalone='yes'", ...
6869 * ... The declaration of a parameter entity must precede
6870 * any reference to it...
6871 */
6872 if ((ctxt->standalone == 1) ||
6873 ((ctxt->hasExternalSubset == 0) &&
6874 (ctxt->hasPErefs == 0))) {
6875 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6876 "PEReference: %%%s; not found\n",
6877 name);
6878 } else {
6879 /*
6880 * [ VC: Entity Declared ]
6881 * In a document with an external subset or external
6882 * parameter entities with "standalone='no'", ...
6883 * ... The declaration of a parameter entity must
6884 * precede any reference to it...
6885 */
6886 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6887 "PEReference: %%%s; not found\n",
6888 name, NULL);
6889 ctxt->valid = 0;
6890 }
6891 } else {
6892 /*
6893 * Internal checking in case the entity quest barfed
6894 */
6895 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6896 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6898 "Internal: %%%s; is not a parameter entity\n",
6899 name, NULL);
6900 } else if (ctxt->input->free != deallocblankswrapper) {
6901 input =
6902 xmlNewBlanksWrapperInputStream(ctxt, entity);
6903 xmlPushInput(ctxt, input);
6904 } else {
6905 /*
6906 * TODO !!!
6907 * handle the extra spaces added before and after
6908 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6909 */
6910 input = xmlNewEntityInputStream(ctxt, entity);
6911 xmlPushInput(ctxt, input);
6912 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006913 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006914 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006915 xmlParseTextDecl(ctxt);
6916 if (ctxt->errNo ==
6917 XML_ERR_UNSUPPORTED_ENCODING) {
6918 /*
6919 * The XML REC instructs us to stop parsing
6920 * right here
6921 */
6922 ctxt->instate = XML_PARSER_EOF;
6923 return;
6924 }
6925 }
6926 }
6927 }
6928 ctxt->hasPErefs = 1;
6929 } else {
6930 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6931 }
6932 }
Owen Taylor3473f882001-02-23 17:55:21 +00006933 }
6934}
6935
6936/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006937 * xmlLoadEntityContent:
6938 * @ctxt: an XML parser context
6939 * @entity: an unloaded system entity
6940 *
6941 * Load the original content of the given system entity from the
6942 * ExternalID/SystemID given. This is to be used for Included in Literal
6943 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
6944 *
6945 * Returns 0 in case of success and -1 in case of failure
6946 */
6947static int
6948xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
6949 xmlParserInputPtr input;
6950 xmlBufferPtr buf;
6951 int l, c;
6952 int count = 0;
6953
6954 if ((ctxt == NULL) || (entity == NULL) ||
6955 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
6956 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
6957 (entity->content != NULL)) {
6958 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6959 "xmlLoadEntityContent parameter error");
6960 return(-1);
6961 }
6962
6963 if (xmlParserDebugEntities)
6964 xmlGenericError(xmlGenericErrorContext,
6965 "Reading %s entity content input\n", entity->name);
6966
6967 buf = xmlBufferCreate();
6968 if (buf == NULL) {
6969 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6970 "xmlLoadEntityContent parameter error");
6971 return(-1);
6972 }
6973
6974 input = xmlNewEntityInputStream(ctxt, entity);
6975 if (input == NULL) {
6976 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6977 "xmlLoadEntityContent input error");
6978 xmlBufferFree(buf);
6979 return(-1);
6980 }
6981
6982 /*
6983 * Push the entity as the current input, read char by char
6984 * saving to the buffer until the end of the entity or an error
6985 */
6986 xmlPushInput(ctxt, input);
6987 GROW;
6988 c = CUR_CHAR(l);
6989 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
6990 (IS_CHAR(c))) {
6991 xmlBufferAdd(buf, ctxt->input->cur, l);
6992 if (count++ > 100) {
6993 count = 0;
6994 GROW;
6995 }
6996 NEXTL(l);
6997 c = CUR_CHAR(l);
6998 }
6999
7000 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7001 xmlPopInput(ctxt);
7002 } else if (!IS_CHAR(c)) {
7003 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7004 "xmlLoadEntityContent: invalid char value %d\n",
7005 c);
7006 xmlBufferFree(buf);
7007 return(-1);
7008 }
7009 entity->content = buf->content;
7010 buf->content = NULL;
7011 xmlBufferFree(buf);
7012
7013 return(0);
7014}
7015
7016/**
Owen Taylor3473f882001-02-23 17:55:21 +00007017 * xmlParseStringPEReference:
7018 * @ctxt: an XML parser context
7019 * @str: a pointer to an index in the string
7020 *
7021 * parse PEReference declarations
7022 *
7023 * [69] PEReference ::= '%' Name ';'
7024 *
7025 * [ WFC: No Recursion ]
7026 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007027 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007028 *
7029 * [ WFC: Entity Declared ]
7030 * In a document without any DTD, a document with only an internal DTD
7031 * subset which contains no parameter entity references, or a document
7032 * with "standalone='yes'", ... ... The declaration of a parameter
7033 * entity must precede any reference to it...
7034 *
7035 * [ VC: Entity Declared ]
7036 * In a document with an external subset or external parameter entities
7037 * with "standalone='no'", ... ... The declaration of a parameter entity
7038 * must precede any reference to it...
7039 *
7040 * [ WFC: In DTD ]
7041 * Parameter-entity references may only appear in the DTD.
7042 * NOTE: misleading but this is handled.
7043 *
7044 * Returns the string of the entity content.
7045 * str is updated to the current value of the index
7046 */
7047xmlEntityPtr
7048xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7049 const xmlChar *ptr;
7050 xmlChar cur;
7051 xmlChar *name;
7052 xmlEntityPtr entity = NULL;
7053
7054 if ((str == NULL) || (*str == NULL)) return(NULL);
7055 ptr = *str;
7056 cur = *ptr;
7057 if (cur == '%') {
7058 ptr++;
7059 cur = *ptr;
7060 name = xmlParseStringName(ctxt, &ptr);
7061 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007062 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7063 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007064 } else {
7065 cur = *ptr;
7066 if (cur == ';') {
7067 ptr++;
7068 cur = *ptr;
7069 if ((ctxt->sax != NULL) &&
7070 (ctxt->sax->getParameterEntity != NULL))
7071 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7072 name);
7073 if (entity == NULL) {
7074 /*
7075 * [ WFC: Entity Declared ]
7076 * In a document without any DTD, a document with only an
7077 * internal DTD subset which contains no parameter entity
7078 * references, or a document with "standalone='yes'", ...
7079 * ... The declaration of a parameter entity must precede
7080 * any reference to it...
7081 */
7082 if ((ctxt->standalone == 1) ||
7083 ((ctxt->hasExternalSubset == 0) &&
7084 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007085 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007086 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007087 } else {
7088 /*
7089 * [ VC: Entity Declared ]
7090 * In a document with an external subset or external
7091 * parameter entities with "standalone='no'", ...
7092 * ... The declaration of a parameter entity must
7093 * precede any reference to it...
7094 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007095 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7096 "PEReference: %%%s; not found\n",
7097 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007098 ctxt->valid = 0;
7099 }
7100 } else {
7101 /*
7102 * Internal checking in case the entity quest barfed
7103 */
7104 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7105 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007106 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7107 "%%%s; is not a parameter entity\n",
7108 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007109 }
7110 }
7111 ctxt->hasPErefs = 1;
7112 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007113 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007114 }
7115 xmlFree(name);
7116 }
7117 }
7118 *str = ptr;
7119 return(entity);
7120}
7121
7122/**
7123 * xmlParseDocTypeDecl:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse a DOCTYPE declaration
7127 *
7128 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7129 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7130 *
7131 * [ VC: Root Element Type ]
7132 * The Name in the document type declaration must match the element
7133 * type of the root element.
7134 */
7135
7136void
7137xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007138 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007139 xmlChar *ExternalID = NULL;
7140 xmlChar *URI = NULL;
7141
7142 /*
7143 * We know that '<!DOCTYPE' has been detected.
7144 */
7145 SKIP(9);
7146
7147 SKIP_BLANKS;
7148
7149 /*
7150 * Parse the DOCTYPE name.
7151 */
7152 name = xmlParseName(ctxt);
7153 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007154 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7155 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007156 }
7157 ctxt->intSubName = name;
7158
7159 SKIP_BLANKS;
7160
7161 /*
7162 * Check for SystemID and ExternalID
7163 */
7164 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7165
7166 if ((URI != NULL) || (ExternalID != NULL)) {
7167 ctxt->hasExternalSubset = 1;
7168 }
7169 ctxt->extSubURI = URI;
7170 ctxt->extSubSystem = ExternalID;
7171
7172 SKIP_BLANKS;
7173
7174 /*
7175 * Create and update the internal subset.
7176 */
7177 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7178 (!ctxt->disableSAX))
7179 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7180
7181 /*
7182 * Is there any internal subset declarations ?
7183 * they are handled separately in xmlParseInternalSubset()
7184 */
7185 if (RAW == '[')
7186 return;
7187
7188 /*
7189 * We should be at the end of the DOCTYPE declaration.
7190 */
7191 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007192 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007193 }
7194 NEXT;
7195}
7196
7197/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007198 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007199 * @ctxt: an XML parser context
7200 *
7201 * parse the internal subset declaration
7202 *
7203 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7204 */
7205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007206static void
Owen Taylor3473f882001-02-23 17:55:21 +00007207xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7208 /*
7209 * Is there any DTD definition ?
7210 */
7211 if (RAW == '[') {
7212 ctxt->instate = XML_PARSER_DTD;
7213 NEXT;
7214 /*
7215 * Parse the succession of Markup declarations and
7216 * PEReferences.
7217 * Subsequence (markupdecl | PEReference | S)*
7218 */
7219 while (RAW != ']') {
7220 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007221 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007222
7223 SKIP_BLANKS;
7224 xmlParseMarkupDecl(ctxt);
7225 xmlParsePEReference(ctxt);
7226
7227 /*
7228 * Pop-up of finished entities.
7229 */
7230 while ((RAW == 0) && (ctxt->inputNr > 1))
7231 xmlPopInput(ctxt);
7232
7233 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007234 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007235 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007236 break;
7237 }
7238 }
7239 if (RAW == ']') {
7240 NEXT;
7241 SKIP_BLANKS;
7242 }
7243 }
7244
7245 /*
7246 * We should be at the end of the DOCTYPE declaration.
7247 */
7248 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007249 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007250 }
7251 NEXT;
7252}
7253
Daniel Veillard81273902003-09-30 00:43:48 +00007254#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007255/**
7256 * xmlParseAttribute:
7257 * @ctxt: an XML parser context
7258 * @value: a xmlChar ** used to store the value of the attribute
7259 *
7260 * parse an attribute
7261 *
7262 * [41] Attribute ::= Name Eq AttValue
7263 *
7264 * [ WFC: No External Entity References ]
7265 * Attribute values cannot contain direct or indirect entity references
7266 * to external entities.
7267 *
7268 * [ WFC: No < in Attribute Values ]
7269 * The replacement text of any entity referred to directly or indirectly in
7270 * an attribute value (other than "&lt;") must not contain a <.
7271 *
7272 * [ VC: Attribute Value Type ]
7273 * The attribute must have been declared; the value must be of the type
7274 * declared for it.
7275 *
7276 * [25] Eq ::= S? '=' S?
7277 *
7278 * With namespace:
7279 *
7280 * [NS 11] Attribute ::= QName Eq AttValue
7281 *
7282 * Also the case QName == xmlns:??? is handled independently as a namespace
7283 * definition.
7284 *
7285 * Returns the attribute name, and the value in *value.
7286 */
7287
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007288const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007289xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007290 const xmlChar *name;
7291 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007292
7293 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007294 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007295 name = xmlParseName(ctxt);
7296 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007297 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007298 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007299 return(NULL);
7300 }
7301
7302 /*
7303 * read the value
7304 */
7305 SKIP_BLANKS;
7306 if (RAW == '=') {
7307 NEXT;
7308 SKIP_BLANKS;
7309 val = xmlParseAttValue(ctxt);
7310 ctxt->instate = XML_PARSER_CONTENT;
7311 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007312 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007313 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007314 return(NULL);
7315 }
7316
7317 /*
7318 * Check that xml:lang conforms to the specification
7319 * No more registered as an error, just generate a warning now
7320 * since this was deprecated in XML second edition
7321 */
7322 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7323 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007324 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7325 "Malformed value for xml:lang : %s\n",
7326 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007327 }
7328 }
7329
7330 /*
7331 * Check that xml:space conforms to the specification
7332 */
7333 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7334 if (xmlStrEqual(val, BAD_CAST "default"))
7335 *(ctxt->space) = 0;
7336 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7337 *(ctxt->space) = 1;
7338 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007339 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007340"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007341 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007342 }
7343 }
7344
7345 *value = val;
7346 return(name);
7347}
7348
7349/**
7350 * xmlParseStartTag:
7351 * @ctxt: an XML parser context
7352 *
7353 * parse a start of tag either for rule element or
7354 * EmptyElement. In both case we don't parse the tag closing chars.
7355 *
7356 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7357 *
7358 * [ WFC: Unique Att Spec ]
7359 * No attribute name may appear more than once in the same start-tag or
7360 * empty-element tag.
7361 *
7362 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7363 *
7364 * [ WFC: Unique Att Spec ]
7365 * No attribute name may appear more than once in the same start-tag or
7366 * empty-element tag.
7367 *
7368 * With namespace:
7369 *
7370 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7371 *
7372 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7373 *
7374 * Returns the element name parsed
7375 */
7376
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007377const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007378xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007379 const xmlChar *name;
7380 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007381 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007382 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007383 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007384 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007385 int i;
7386
7387 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007388 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007389
7390 name = xmlParseName(ctxt);
7391 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007392 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007393 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007394 return(NULL);
7395 }
7396
7397 /*
7398 * Now parse the attributes, it ends up with the ending
7399 *
7400 * (S Attribute)* S?
7401 */
7402 SKIP_BLANKS;
7403 GROW;
7404
Daniel Veillard21a0f912001-02-25 19:54:14 +00007405 while ((RAW != '>') &&
7406 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007407 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007408 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007409 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007410
7411 attname = xmlParseAttribute(ctxt, &attvalue);
7412 if ((attname != NULL) && (attvalue != NULL)) {
7413 /*
7414 * [ WFC: Unique Att Spec ]
7415 * No attribute name may appear more than once in the same
7416 * start-tag or empty-element tag.
7417 */
7418 for (i = 0; i < nbatts;i += 2) {
7419 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007420 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007421 xmlFree(attvalue);
7422 goto failed;
7423 }
7424 }
Owen Taylor3473f882001-02-23 17:55:21 +00007425 /*
7426 * Add the pair to atts
7427 */
7428 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007429 maxatts = 22; /* allow for 10 attrs by default */
7430 atts = (const xmlChar **)
7431 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007432 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007433 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007434 if (attvalue != NULL)
7435 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007436 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007437 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007438 ctxt->atts = atts;
7439 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007440 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007441 const xmlChar **n;
7442
Owen Taylor3473f882001-02-23 17:55:21 +00007443 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007444 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007445 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007446 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007447 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007448 if (attvalue != NULL)
7449 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007450 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007451 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007452 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007453 ctxt->atts = atts;
7454 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007455 }
7456 atts[nbatts++] = attname;
7457 atts[nbatts++] = attvalue;
7458 atts[nbatts] = NULL;
7459 atts[nbatts + 1] = NULL;
7460 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007461 if (attvalue != NULL)
7462 xmlFree(attvalue);
7463 }
7464
7465failed:
7466
Daniel Veillard3772de32002-12-17 10:31:45 +00007467 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007468 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7469 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007470 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007471 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7472 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007473 }
7474 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007475 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7476 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007477 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7478 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007479 break;
7480 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007481 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007482 GROW;
7483 }
7484
7485 /*
7486 * SAX: Start of Element !
7487 */
7488 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007489 (!ctxt->disableSAX)) {
7490 if (nbatts > 0)
7491 ctxt->sax->startElement(ctxt->userData, name, atts);
7492 else
7493 ctxt->sax->startElement(ctxt->userData, name, NULL);
7494 }
Owen Taylor3473f882001-02-23 17:55:21 +00007495
7496 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007497 /* Free only the content strings */
7498 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007499 if (atts[i] != NULL)
7500 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
7502 return(name);
7503}
7504
7505/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007507 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007508 * @line: line of the start tag
7509 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007510 *
7511 * parse an end of tag
7512 *
7513 * [42] ETag ::= '</' Name S? '>'
7514 *
7515 * With namespace
7516 *
7517 * [NS 9] ETag ::= '</' QName S? '>'
7518 */
7519
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007520static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007522 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007523
7524 GROW;
7525 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007526 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007527 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007528 return;
7529 }
7530 SKIP(2);
7531
Daniel Veillard46de64e2002-05-29 08:21:33 +00007532 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007533
7534 /*
7535 * We should definitely be at the ending "S? '>'" part
7536 */
7537 GROW;
7538 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007539 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007540 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007541 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007542 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007543
7544 /*
7545 * [ WFC: Element Type Match ]
7546 * The Name in an element's end-tag must match the element type in the
7547 * start-tag.
7548 *
7549 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007550 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007551 if (name == NULL) name = BAD_CAST "unparseable";
7552 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007553 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007554 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007555 }
7556
7557 /*
7558 * SAX: End of Tag
7559 */
7560 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7561 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007562 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007563
Daniel Veillarde57ec792003-09-10 10:50:59 +00007564 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007565 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007566 return;
7567}
7568
7569/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007570 * xmlParseEndTag:
7571 * @ctxt: an XML parser context
7572 *
7573 * parse an end of tag
7574 *
7575 * [42] ETag ::= '</' Name S? '>'
7576 *
7577 * With namespace
7578 *
7579 * [NS 9] ETag ::= '</' QName S? '>'
7580 */
7581
7582void
7583xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007584 xmlParseEndTag1(ctxt, 0);
7585}
Daniel Veillard81273902003-09-30 00:43:48 +00007586#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007587
7588/************************************************************************
7589 * *
7590 * SAX 2 specific operations *
7591 * *
7592 ************************************************************************/
7593
7594static const xmlChar *
7595xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7596 int len = 0, l;
7597 int c;
7598 int count = 0;
7599
7600 /*
7601 * Handler for more complex cases
7602 */
7603 GROW;
7604 c = CUR_CHAR(l);
7605 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007606 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007607 return(NULL);
7608 }
7609
7610 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007611 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007612 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007613 (IS_COMBINING(c)) ||
7614 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007615 if (count++ > 100) {
7616 count = 0;
7617 GROW;
7618 }
7619 len += l;
7620 NEXTL(l);
7621 c = CUR_CHAR(l);
7622 }
7623 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7624}
7625
7626/*
7627 * xmlGetNamespace:
7628 * @ctxt: an XML parser context
7629 * @prefix: the prefix to lookup
7630 *
7631 * Lookup the namespace name for the @prefix (which ca be NULL)
7632 * The prefix must come from the @ctxt->dict dictionnary
7633 *
7634 * Returns the namespace name or NULL if not bound
7635 */
7636static const xmlChar *
7637xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7638 int i;
7639
Daniel Veillarde57ec792003-09-10 10:50:59 +00007640 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007642 if (ctxt->nsTab[i] == prefix) {
7643 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7644 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007646 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007647 return(NULL);
7648}
7649
7650/**
7651 * xmlParseNCName:
7652 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007653 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654 *
7655 * parse an XML name.
7656 *
7657 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7658 * CombiningChar | Extender
7659 *
7660 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7661 *
7662 * Returns the Name parsed or NULL
7663 */
7664
7665static const xmlChar *
7666xmlParseNCName(xmlParserCtxtPtr ctxt) {
7667 const xmlChar *in;
7668 const xmlChar *ret;
7669 int count = 0;
7670
7671 /*
7672 * Accelerator for simple ASCII names
7673 */
7674 in = ctxt->input->cur;
7675 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7676 ((*in >= 0x41) && (*in <= 0x5A)) ||
7677 (*in == '_')) {
7678 in++;
7679 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7680 ((*in >= 0x41) && (*in <= 0x5A)) ||
7681 ((*in >= 0x30) && (*in <= 0x39)) ||
7682 (*in == '_') || (*in == '-') ||
7683 (*in == '.'))
7684 in++;
7685 if ((*in > 0) && (*in < 0x80)) {
7686 count = in - ctxt->input->cur;
7687 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7688 ctxt->input->cur = in;
7689 ctxt->nbChars += count;
7690 ctxt->input->col += count;
7691 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007692 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 }
7694 return(ret);
7695 }
7696 }
7697 return(xmlParseNCNameComplex(ctxt));
7698}
7699
7700/**
7701 * xmlParseQName:
7702 * @ctxt: an XML parser context
7703 * @prefix: pointer to store the prefix part
7704 *
7705 * parse an XML Namespace QName
7706 *
7707 * [6] QName ::= (Prefix ':')? LocalPart
7708 * [7] Prefix ::= NCName
7709 * [8] LocalPart ::= NCName
7710 *
7711 * Returns the Name parsed or NULL
7712 */
7713
7714static const xmlChar *
7715xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7716 const xmlChar *l, *p;
7717
7718 GROW;
7719
7720 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007721 if (l == NULL) {
7722 if (CUR == ':') {
7723 l = xmlParseName(ctxt);
7724 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007725 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7726 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007727 *prefix = NULL;
7728 return(l);
7729 }
7730 }
7731 return(NULL);
7732 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 if (CUR == ':') {
7734 NEXT;
7735 p = l;
7736 l = xmlParseNCName(ctxt);
7737 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007738 xmlChar *tmp;
7739
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007740 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7741 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007742 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7743 p = xmlDictLookup(ctxt->dict, tmp, -1);
7744 if (tmp != NULL) xmlFree(tmp);
7745 *prefix = NULL;
7746 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007747 }
7748 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007749 xmlChar *tmp;
7750
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007751 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7752 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007753 NEXT;
7754 tmp = (xmlChar *) xmlParseName(ctxt);
7755 if (tmp != NULL) {
7756 tmp = xmlBuildQName(tmp, l, NULL, 0);
7757 l = xmlDictLookup(ctxt->dict, tmp, -1);
7758 if (tmp != NULL) xmlFree(tmp);
7759 *prefix = p;
7760 return(l);
7761 }
7762 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7763 l = xmlDictLookup(ctxt->dict, tmp, -1);
7764 if (tmp != NULL) xmlFree(tmp);
7765 *prefix = p;
7766 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007767 }
7768 *prefix = p;
7769 } else
7770 *prefix = NULL;
7771 return(l);
7772}
7773
7774/**
7775 * xmlParseQNameAndCompare:
7776 * @ctxt: an XML parser context
7777 * @name: the localname
7778 * @prefix: the prefix, if any.
7779 *
7780 * parse an XML name and compares for match
7781 * (specialized for endtag parsing)
7782 *
7783 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7784 * and the name for mismatch
7785 */
7786
7787static const xmlChar *
7788xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7789 xmlChar const *prefix) {
7790 const xmlChar *cmp = name;
7791 const xmlChar *in;
7792 const xmlChar *ret;
7793 const xmlChar *prefix2;
7794
7795 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7796
7797 GROW;
7798 in = ctxt->input->cur;
7799
7800 cmp = prefix;
7801 while (*in != 0 && *in == *cmp) {
7802 ++in;
7803 ++cmp;
7804 }
7805 if ((*cmp == 0) && (*in == ':')) {
7806 in++;
7807 cmp = name;
7808 while (*in != 0 && *in == *cmp) {
7809 ++in;
7810 ++cmp;
7811 }
William M. Brack76e95df2003-10-18 16:20:14 +00007812 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007813 /* success */
7814 ctxt->input->cur = in;
7815 return((const xmlChar*) 1);
7816 }
7817 }
7818 /*
7819 * all strings coms from the dictionary, equality can be done directly
7820 */
7821 ret = xmlParseQName (ctxt, &prefix2);
7822 if ((ret == name) && (prefix == prefix2))
7823 return((const xmlChar*) 1);
7824 return ret;
7825}
7826
7827/**
7828 * xmlParseAttValueInternal:
7829 * @ctxt: an XML parser context
7830 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007831 * @alloc: whether the attribute was reallocated as a new string
7832 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007833 *
7834 * parse a value for an attribute.
7835 * NOTE: if no normalization is needed, the routine will return pointers
7836 * directly from the data buffer.
7837 *
7838 * 3.3.3 Attribute-Value Normalization:
7839 * Before the value of an attribute is passed to the application or
7840 * checked for validity, the XML processor must normalize it as follows:
7841 * - a character reference is processed by appending the referenced
7842 * character to the attribute value
7843 * - an entity reference is processed by recursively processing the
7844 * replacement text of the entity
7845 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7846 * appending #x20 to the normalized value, except that only a single
7847 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7848 * parsed entity or the literal entity value of an internal parsed entity
7849 * - other characters are processed by appending them to the normalized value
7850 * If the declared value is not CDATA, then the XML processor must further
7851 * process the normalized attribute value by discarding any leading and
7852 * trailing space (#x20) characters, and by replacing sequences of space
7853 * (#x20) characters by a single space (#x20) character.
7854 * All attributes for which no declaration has been read should be treated
7855 * by a non-validating parser as if declared CDATA.
7856 *
7857 * Returns the AttValue parsed or NULL. The value has to be freed by the
7858 * caller if it was copied, this can be detected by val[*len] == 0.
7859 */
7860
7861static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007862xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7863 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007864{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007866 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007867 xmlChar *ret = NULL;
7868
7869 GROW;
7870 in = (xmlChar *) CUR_PTR;
7871 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007872 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007873 return (NULL);
7874 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007875 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007876
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007877 /*
7878 * try to handle in this routine the most common case where no
7879 * allocation of a new string is required and where content is
7880 * pure ASCII.
7881 */
7882 limit = *in++;
7883 end = ctxt->input->end;
7884 start = in;
7885 if (in >= end) {
7886 const xmlChar *oldbase = ctxt->input->base;
7887 GROW;
7888 if (oldbase != ctxt->input->base) {
7889 long delta = ctxt->input->base - oldbase;
7890 start = start + delta;
7891 in = in + delta;
7892 }
7893 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007894 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007895 if (normalize) {
7896 /*
7897 * Skip any leading spaces
7898 */
7899 while ((in < end) && (*in != limit) &&
7900 ((*in == 0x20) || (*in == 0x9) ||
7901 (*in == 0xA) || (*in == 0xD))) {
7902 in++;
7903 start = in;
7904 if (in >= end) {
7905 const xmlChar *oldbase = ctxt->input->base;
7906 GROW;
7907 if (oldbase != ctxt->input->base) {
7908 long delta = ctxt->input->base - oldbase;
7909 start = start + delta;
7910 in = in + delta;
7911 }
7912 end = ctxt->input->end;
7913 }
7914 }
7915 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7916 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7917 if ((*in++ == 0x20) && (*in == 0x20)) break;
7918 if (in >= end) {
7919 const xmlChar *oldbase = ctxt->input->base;
7920 GROW;
7921 if (oldbase != ctxt->input->base) {
7922 long delta = ctxt->input->base - oldbase;
7923 start = start + delta;
7924 in = in + delta;
7925 }
7926 end = ctxt->input->end;
7927 }
7928 }
7929 last = in;
7930 /*
7931 * skip the trailing blanks
7932 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007933 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007934 while ((in < end) && (*in != limit) &&
7935 ((*in == 0x20) || (*in == 0x9) ||
7936 (*in == 0xA) || (*in == 0xD))) {
7937 in++;
7938 if (in >= end) {
7939 const xmlChar *oldbase = ctxt->input->base;
7940 GROW;
7941 if (oldbase != ctxt->input->base) {
7942 long delta = ctxt->input->base - oldbase;
7943 start = start + delta;
7944 in = in + delta;
7945 last = last + delta;
7946 }
7947 end = ctxt->input->end;
7948 }
7949 }
7950 if (*in != limit) goto need_complex;
7951 } else {
7952 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7953 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7954 in++;
7955 if (in >= end) {
7956 const xmlChar *oldbase = ctxt->input->base;
7957 GROW;
7958 if (oldbase != ctxt->input->base) {
7959 long delta = ctxt->input->base - oldbase;
7960 start = start + delta;
7961 in = in + delta;
7962 }
7963 end = ctxt->input->end;
7964 }
7965 }
7966 last = in;
7967 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007969 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007971 *len = last - start;
7972 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007973 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007974 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007975 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007976 }
7977 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007978 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007979 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007980need_complex:
7981 if (alloc) *alloc = 1;
7982 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983}
7984
7985/**
7986 * xmlParseAttribute2:
7987 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007988 * @pref: the element prefix
7989 * @elem: the element name
7990 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007991 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007992 * @len: an int * to save the length of the attribute
7993 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007994 *
7995 * parse an attribute in the new SAX2 framework.
7996 *
7997 * Returns the attribute name, and the value in *value, .
7998 */
7999
8000static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008001xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008002 const xmlChar * pref, const xmlChar * elem,
8003 const xmlChar ** prefix, xmlChar ** value,
8004 int *len, int *alloc)
8005{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008006 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008007 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008008 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008009
8010 *value = NULL;
8011 GROW;
8012 name = xmlParseQName(ctxt, prefix);
8013 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008014 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8015 "error parsing attribute name\n");
8016 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 }
8018
8019 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008020 * get the type if needed
8021 */
8022 if (ctxt->attsSpecial != NULL) {
8023 int type;
8024
8025 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008026 pref, elem, *prefix, name);
8027 if (type != 0)
8028 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008029 }
8030
8031 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008032 * read the value
8033 */
8034 SKIP_BLANKS;
8035 if (RAW == '=') {
8036 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008037 SKIP_BLANKS;
8038 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8039 if (normalize) {
8040 /*
8041 * Sometimes a second normalisation pass for spaces is needed
8042 * but that only happens if charrefs or entities refernces
8043 * have been used in the attribute value, i.e. the attribute
8044 * value have been extracted in an allocated string already.
8045 */
8046 if (*alloc) {
8047 const xmlChar *val2;
8048
8049 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8050 if (val2 != NULL) {
8051 xmlFree(val);
8052 val = val2;
8053 }
8054 }
8055 }
8056 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008057 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008058 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8059 "Specification mandate value for attribute %s\n",
8060 name);
8061 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 }
8063
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008064 if (*prefix == ctxt->str_xml) {
8065 /*
8066 * Check that xml:lang conforms to the specification
8067 * No more registered as an error, just generate a warning now
8068 * since this was deprecated in XML second edition
8069 */
8070 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8071 internal_val = xmlStrndup(val, *len);
8072 if (!xmlCheckLanguageID(internal_val)) {
8073 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8074 "Malformed value for xml:lang : %s\n",
8075 internal_val, NULL);
8076 }
8077 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008079 /*
8080 * Check that xml:space conforms to the specification
8081 */
8082 if (xmlStrEqual(name, BAD_CAST "space")) {
8083 internal_val = xmlStrndup(val, *len);
8084 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8085 *(ctxt->space) = 0;
8086 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8087 *(ctxt->space) = 1;
8088 else {
8089 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8090 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8091 internal_val, NULL);
8092 }
8093 }
8094 if (internal_val) {
8095 xmlFree(internal_val);
8096 }
8097 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008098
8099 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008100 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008101}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008102/**
8103 * xmlParseStartTag2:
8104 * @ctxt: an XML parser context
8105 *
8106 * parse a start of tag either for rule element or
8107 * EmptyElement. In both case we don't parse the tag closing chars.
8108 * This routine is called when running SAX2 parsing
8109 *
8110 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8111 *
8112 * [ WFC: Unique Att Spec ]
8113 * No attribute name may appear more than once in the same start-tag or
8114 * empty-element tag.
8115 *
8116 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8117 *
8118 * [ WFC: Unique Att Spec ]
8119 * No attribute name may appear more than once in the same start-tag or
8120 * empty-element tag.
8121 *
8122 * With namespace:
8123 *
8124 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8125 *
8126 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8127 *
8128 * Returns the element name parsed
8129 */
8130
8131static const xmlChar *
8132xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008133 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008134 const xmlChar *localname;
8135 const xmlChar *prefix;
8136 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008137 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008138 const xmlChar *nsname;
8139 xmlChar *attvalue;
8140 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008141 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008142 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008143 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008144 const xmlChar *base;
8145 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008146 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008147
8148 if (RAW != '<') return(NULL);
8149 NEXT1;
8150
8151 /*
8152 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8153 * point since the attribute values may be stored as pointers to
8154 * the buffer and calling SHRINK would destroy them !
8155 * The Shrinking is only possible once the full set of attribute
8156 * callbacks have been done.
8157 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008158reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008159 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008160 base = ctxt->input->base;
8161 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008162 oldline = ctxt->input->line;
8163 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008164 nbatts = 0;
8165 nratts = 0;
8166 nbdef = 0;
8167 nbNs = 0;
8168 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008169 /* Forget any namespaces added during an earlier parse of this element. */
8170 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008171
8172 localname = xmlParseQName(ctxt, &prefix);
8173 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008174 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8175 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 return(NULL);
8177 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008178 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179
8180 /*
8181 * Now parse the attributes, it ends up with the ending
8182 *
8183 * (S Attribute)* S?
8184 */
8185 SKIP_BLANKS;
8186 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008187 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008188
8189 while ((RAW != '>') &&
8190 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008191 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008192 const xmlChar *q = CUR_PTR;
8193 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008194 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008196 attname = xmlParseAttribute2(ctxt, prefix, localname,
8197 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008198 if (ctxt->input->base != base) {
8199 if ((attvalue != NULL) && (alloc != 0))
8200 xmlFree(attvalue);
8201 attvalue = NULL;
8202 goto base_changed;
8203 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008204 if ((attname != NULL) && (attvalue != NULL)) {
8205 if (len < 0) len = xmlStrlen(attvalue);
8206 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008207 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8208 xmlURIPtr uri;
8209
8210 if (*URL != 0) {
8211 uri = xmlParseURI((const char *) URL);
8212 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008213 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8214 "xmlns: %s not a valid URI\n",
8215 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008216 } else {
8217 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008218 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8219 "xmlns: URI %s is not absolute\n",
8220 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008221 }
8222 xmlFreeURI(uri);
8223 }
8224 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008225 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008226 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008227 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008228 for (j = 1;j <= nbNs;j++)
8229 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8230 break;
8231 if (j <= nbNs)
8232 xmlErrAttributeDup(ctxt, NULL, attname);
8233 else
8234 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008235 if (alloc != 0) xmlFree(attvalue);
8236 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008237 continue;
8238 }
8239 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008240 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8241 xmlURIPtr uri;
8242
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008243 if (attname == ctxt->str_xml) {
8244 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008245 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8246 "xml namespace prefix mapped to wrong URI\n",
8247 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008248 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008249 /*
8250 * Do not keep a namespace definition node
8251 */
8252 if (alloc != 0) xmlFree(attvalue);
8253 SKIP_BLANKS;
8254 continue;
8255 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008256 uri = xmlParseURI((const char *) URL);
8257 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008258 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8259 "xmlns:%s: '%s' is not a valid URI\n",
8260 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008261 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008262 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008263 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8264 "xmlns:%s: URI %s is not absolute\n",
8265 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008266 }
8267 xmlFreeURI(uri);
8268 }
8269
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008271 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008273 for (j = 1;j <= nbNs;j++)
8274 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8275 break;
8276 if (j <= nbNs)
8277 xmlErrAttributeDup(ctxt, aprefix, attname);
8278 else
8279 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008280 if (alloc != 0) xmlFree(attvalue);
8281 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008282 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 continue;
8284 }
8285
8286 /*
8287 * Add the pair to atts
8288 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008289 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8290 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008291 if (attvalue[len] == 0)
8292 xmlFree(attvalue);
8293 goto failed;
8294 }
8295 maxatts = ctxt->maxatts;
8296 atts = ctxt->atts;
8297 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008298 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008299 atts[nbatts++] = attname;
8300 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008301 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008302 atts[nbatts++] = attvalue;
8303 attvalue += len;
8304 atts[nbatts++] = attvalue;
8305 /*
8306 * tag if some deallocation is needed
8307 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008308 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008309 } else {
8310 if ((attvalue != NULL) && (attvalue[len] == 0))
8311 xmlFree(attvalue);
8312 }
8313
8314failed:
8315
8316 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008317 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008318 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8319 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008320 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8322 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008323 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008324 }
8325 SKIP_BLANKS;
8326 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8327 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008328 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008329 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008330 break;
8331 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008332 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008333 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008334 }
8335
Daniel Veillard0fb18932003-09-07 09:14:37 +00008336 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008337 * The attributes defaulting
8338 */
8339 if (ctxt->attsDefault != NULL) {
8340 xmlDefAttrsPtr defaults;
8341
8342 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8343 if (defaults != NULL) {
8344 for (i = 0;i < defaults->nbAttrs;i++) {
8345 attname = defaults->values[4 * i];
8346 aprefix = defaults->values[4 * i + 1];
8347
8348 /*
8349 * special work for namespaces defaulted defs
8350 */
8351 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8352 /*
8353 * check that it's not a defined namespace
8354 */
8355 for (j = 1;j <= nbNs;j++)
8356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8357 break;
8358 if (j <= nbNs) continue;
8359
8360 nsname = xmlGetNamespace(ctxt, NULL);
8361 if (nsname != defaults->values[4 * i + 2]) {
8362 if (nsPush(ctxt, NULL,
8363 defaults->values[4 * i + 2]) > 0)
8364 nbNs++;
8365 }
8366 } else if (aprefix == ctxt->str_xmlns) {
8367 /*
8368 * check that it's not a defined namespace
8369 */
8370 for (j = 1;j <= nbNs;j++)
8371 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8372 break;
8373 if (j <= nbNs) continue;
8374
8375 nsname = xmlGetNamespace(ctxt, attname);
8376 if (nsname != defaults->values[2]) {
8377 if (nsPush(ctxt, attname,
8378 defaults->values[4 * i + 2]) > 0)
8379 nbNs++;
8380 }
8381 } else {
8382 /*
8383 * check that it's not a defined attribute
8384 */
8385 for (j = 0;j < nbatts;j+=5) {
8386 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8387 break;
8388 }
8389 if (j < nbatts) continue;
8390
8391 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8392 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008393 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008394 }
8395 maxatts = ctxt->maxatts;
8396 atts = ctxt->atts;
8397 }
8398 atts[nbatts++] = attname;
8399 atts[nbatts++] = aprefix;
8400 if (aprefix == NULL)
8401 atts[nbatts++] = NULL;
8402 else
8403 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8404 atts[nbatts++] = defaults->values[4 * i + 2];
8405 atts[nbatts++] = defaults->values[4 * i + 3];
8406 nbdef++;
8407 }
8408 }
8409 }
8410 }
8411
Daniel Veillarde70c8772003-11-25 07:21:18 +00008412 /*
8413 * The attributes checkings
8414 */
8415 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008416 /*
8417 * The default namespace does not apply to attribute names.
8418 */
8419 if (atts[i + 1] != NULL) {
8420 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8421 if (nsname == NULL) {
8422 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8423 "Namespace prefix %s for %s on %s is not defined\n",
8424 atts[i + 1], atts[i], localname);
8425 }
8426 atts[i + 2] = nsname;
8427 } else
8428 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008429 /*
8430 * [ WFC: Unique Att Spec ]
8431 * No attribute name may appear more than once in the same
8432 * start-tag or empty-element tag.
8433 * As extended by the Namespace in XML REC.
8434 */
8435 for (j = 0; j < i;j += 5) {
8436 if (atts[i] == atts[j]) {
8437 if (atts[i+1] == atts[j+1]) {
8438 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8439 break;
8440 }
8441 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8442 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8443 "Namespaced Attribute %s in '%s' redefined\n",
8444 atts[i], nsname, NULL);
8445 break;
8446 }
8447 }
8448 }
8449 }
8450
Daniel Veillarde57ec792003-09-10 10:50:59 +00008451 nsname = xmlGetNamespace(ctxt, prefix);
8452 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008453 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8454 "Namespace prefix %s on %s is not defined\n",
8455 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008456 }
8457 *pref = prefix;
8458 *URI = nsname;
8459
8460 /*
8461 * SAX: Start of Element !
8462 */
8463 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8464 (!ctxt->disableSAX)) {
8465 if (nbNs > 0)
8466 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8467 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8468 nbatts / 5, nbdef, atts);
8469 else
8470 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8471 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8472 }
8473
8474 /*
8475 * Free up attribute allocated strings if needed
8476 */
8477 if (attval != 0) {
8478 for (i = 3,j = 0; j < nratts;i += 5,j++)
8479 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8480 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008481 }
8482
8483 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008484
8485base_changed:
8486 /*
8487 * the attribute strings are valid iif the base didn't changed
8488 */
8489 if (attval != 0) {
8490 for (i = 3,j = 0; j < nratts;i += 5,j++)
8491 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8492 xmlFree((xmlChar *) atts[i]);
8493 }
8494 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008495 ctxt->input->line = oldline;
8496 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008497 if (ctxt->wellFormed == 1) {
8498 goto reparse;
8499 }
8500 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008501}
8502
8503/**
8504 * xmlParseEndTag2:
8505 * @ctxt: an XML parser context
8506 * @line: line of the start tag
8507 * @nsNr: number of namespaces on the start tag
8508 *
8509 * parse an end of tag
8510 *
8511 * [42] ETag ::= '</' Name S? '>'
8512 *
8513 * With namespace
8514 *
8515 * [NS 9] ETag ::= '</' QName S? '>'
8516 */
8517
8518static void
8519xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008520 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008521 const xmlChar *name;
8522
8523 GROW;
8524 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008525 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008526 return;
8527 }
8528 SKIP(2);
8529
William M. Brack13dfa872004-09-18 04:52:08 +00008530 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008531 if (ctxt->input->cur[tlen] == '>') {
8532 ctxt->input->cur += tlen + 1;
8533 goto done;
8534 }
8535 ctxt->input->cur += tlen;
8536 name = (xmlChar*)1;
8537 } else {
8538 if (prefix == NULL)
8539 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8540 else
8541 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8542 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543
8544 /*
8545 * We should definitely be at the ending "S? '>'" part
8546 */
8547 GROW;
8548 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008549 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008550 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008551 } else
8552 NEXT1;
8553
8554 /*
8555 * [ WFC: Element Type Match ]
8556 * The Name in an element's end-tag must match the element type in the
8557 * start-tag.
8558 *
8559 */
8560 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008561 if (name == NULL) name = BAD_CAST "unparseable";
8562 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008564 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008565 }
8566
8567 /*
8568 * SAX: End of Tag
8569 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008570done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8572 (!ctxt->disableSAX))
8573 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8574
Daniel Veillard0fb18932003-09-07 09:14:37 +00008575 spacePop(ctxt);
8576 if (nsNr != 0)
8577 nsPop(ctxt, nsNr);
8578 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008579}
8580
8581/**
Owen Taylor3473f882001-02-23 17:55:21 +00008582 * xmlParseCDSect:
8583 * @ctxt: an XML parser context
8584 *
8585 * Parse escaped pure raw content.
8586 *
8587 * [18] CDSect ::= CDStart CData CDEnd
8588 *
8589 * [19] CDStart ::= '<![CDATA['
8590 *
8591 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8592 *
8593 * [21] CDEnd ::= ']]>'
8594 */
8595void
8596xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8597 xmlChar *buf = NULL;
8598 int len = 0;
8599 int size = XML_PARSER_BUFFER_SIZE;
8600 int r, rl;
8601 int s, sl;
8602 int cur, l;
8603 int count = 0;
8604
Daniel Veillard8f597c32003-10-06 08:19:27 +00008605 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008606 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008607 SKIP(9);
8608 } else
8609 return;
8610
8611 ctxt->instate = XML_PARSER_CDATA_SECTION;
8612 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008613 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008614 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008615 ctxt->instate = XML_PARSER_CONTENT;
8616 return;
8617 }
8618 NEXTL(rl);
8619 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008620 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008621 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008622 ctxt->instate = XML_PARSER_CONTENT;
8623 return;
8624 }
8625 NEXTL(sl);
8626 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008627 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008628 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008629 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008630 return;
8631 }
William M. Brack871611b2003-10-18 04:53:14 +00008632 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008633 ((r != ']') || (s != ']') || (cur != '>'))) {
8634 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008635 xmlChar *tmp;
8636
Owen Taylor3473f882001-02-23 17:55:21 +00008637 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008638 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8639 if (tmp == NULL) {
8640 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008641 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008642 return;
8643 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008644 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008645 }
8646 COPY_BUF(rl,buf,len,r);
8647 r = s;
8648 rl = sl;
8649 s = cur;
8650 sl = l;
8651 count++;
8652 if (count > 50) {
8653 GROW;
8654 count = 0;
8655 }
8656 NEXTL(l);
8657 cur = CUR_CHAR(l);
8658 }
8659 buf[len] = 0;
8660 ctxt->instate = XML_PARSER_CONTENT;
8661 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008662 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008663 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008664 xmlFree(buf);
8665 return;
8666 }
8667 NEXTL(l);
8668
8669 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008670 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008671 */
8672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8673 if (ctxt->sax->cdataBlock != NULL)
8674 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008675 else if (ctxt->sax->characters != NULL)
8676 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008677 }
8678 xmlFree(buf);
8679}
8680
8681/**
8682 * xmlParseContent:
8683 * @ctxt: an XML parser context
8684 *
8685 * Parse a content:
8686 *
8687 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8688 */
8689
8690void
8691xmlParseContent(xmlParserCtxtPtr ctxt) {
8692 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008693 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008694 ((RAW != '<') || (NXT(1) != '/')) &&
8695 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008696 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008697 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008698 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008699
8700 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008701 * First case : a Processing Instruction.
8702 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008703 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008704 xmlParsePI(ctxt);
8705 }
8706
8707 /*
8708 * Second case : a CDSection
8709 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008710 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008711 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008712 xmlParseCDSect(ctxt);
8713 }
8714
8715 /*
8716 * Third case : a comment
8717 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008718 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008719 (NXT(2) == '-') && (NXT(3) == '-')) {
8720 xmlParseComment(ctxt);
8721 ctxt->instate = XML_PARSER_CONTENT;
8722 }
8723
8724 /*
8725 * Fourth case : a sub-element.
8726 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008727 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008728 xmlParseElement(ctxt);
8729 }
8730
8731 /*
8732 * Fifth case : a reference. If if has not been resolved,
8733 * parsing returns it's Name, create the node
8734 */
8735
Daniel Veillard21a0f912001-02-25 19:54:14 +00008736 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008737 xmlParseReference(ctxt);
8738 }
8739
8740 /*
8741 * Last case, text. Note that References are handled directly.
8742 */
8743 else {
8744 xmlParseCharData(ctxt, 0);
8745 }
8746
8747 GROW;
8748 /*
8749 * Pop-up of finished entities.
8750 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008751 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008752 xmlPopInput(ctxt);
8753 SHRINK;
8754
Daniel Veillardfdc91562002-07-01 21:52:03 +00008755 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008756 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8757 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008758 ctxt->instate = XML_PARSER_EOF;
8759 break;
8760 }
8761 }
8762}
8763
8764/**
8765 * xmlParseElement:
8766 * @ctxt: an XML parser context
8767 *
8768 * parse an XML element, this is highly recursive
8769 *
8770 * [39] element ::= EmptyElemTag | STag content ETag
8771 *
8772 * [ WFC: Element Type Match ]
8773 * The Name in an element's end-tag must match the element type in the
8774 * start-tag.
8775 *
Owen Taylor3473f882001-02-23 17:55:21 +00008776 */
8777
8778void
8779xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008780 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008781 const xmlChar *prefix;
8782 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008783 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008784 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008785 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008786 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008787
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008788 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8789 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8790 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8791 xmlParserMaxDepth);
8792 ctxt->instate = XML_PARSER_EOF;
8793 return;
8794 }
8795
Owen Taylor3473f882001-02-23 17:55:21 +00008796 /* Capture start position */
8797 if (ctxt->record_info) {
8798 node_info.begin_pos = ctxt->input->consumed +
8799 (CUR_PTR - ctxt->input->base);
8800 node_info.begin_line = ctxt->input->line;
8801 }
8802
8803 if (ctxt->spaceNr == 0)
8804 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008805 else if (*ctxt->space == -2)
8806 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008807 else
8808 spacePush(ctxt, *ctxt->space);
8809
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008810 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008811#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008813#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008814 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008815#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816 else
8817 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008818#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008819 if (name == NULL) {
8820 spacePop(ctxt);
8821 return;
8822 }
8823 namePush(ctxt, name);
8824 ret = ctxt->node;
8825
Daniel Veillard4432df22003-09-28 18:58:27 +00008826#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008827 /*
8828 * [ VC: Root Element Type ]
8829 * The Name in the document type declaration must match the element
8830 * type of the root element.
8831 */
8832 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8833 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8834 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008835#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008836
8837 /*
8838 * Check for an Empty Element.
8839 */
8840 if ((RAW == '/') && (NXT(1) == '>')) {
8841 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008842 if (ctxt->sax2) {
8843 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8844 (!ctxt->disableSAX))
8845 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008846#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 } else {
8848 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8849 (!ctxt->disableSAX))
8850 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008851#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008853 namePop(ctxt);
8854 spacePop(ctxt);
8855 if (nsNr != ctxt->nsNr)
8856 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008857 if ( ret != NULL && ctxt->record_info ) {
8858 node_info.end_pos = ctxt->input->consumed +
8859 (CUR_PTR - ctxt->input->base);
8860 node_info.end_line = ctxt->input->line;
8861 node_info.node = ret;
8862 xmlParserAddNodeInfo(ctxt, &node_info);
8863 }
8864 return;
8865 }
8866 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008867 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008868 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008869 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8870 "Couldn't find end of Start Tag %s line %d\n",
8871 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008872
8873 /*
8874 * end of parsing of this node.
8875 */
8876 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008878 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008879 if (nsNr != ctxt->nsNr)
8880 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008881
8882 /*
8883 * Capture end position and add node
8884 */
8885 if ( ret != NULL && ctxt->record_info ) {
8886 node_info.end_pos = ctxt->input->consumed +
8887 (CUR_PTR - ctxt->input->base);
8888 node_info.end_line = ctxt->input->line;
8889 node_info.node = ret;
8890 xmlParserAddNodeInfo(ctxt, &node_info);
8891 }
8892 return;
8893 }
8894
8895 /*
8896 * Parse the content of the element:
8897 */
8898 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008899 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008900 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008901 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008902 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008903
8904 /*
8905 * end of parsing of this node.
8906 */
8907 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008909 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 if (nsNr != ctxt->nsNr)
8911 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008912 return;
8913 }
8914
8915 /*
8916 * parse the end of tag: '</' should be here.
8917 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008918 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008919 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008920 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008921 }
8922#ifdef LIBXML_SAX1_ENABLED
8923 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008924 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008925#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008926
8927 /*
8928 * Capture end position and add node
8929 */
8930 if ( ret != NULL && ctxt->record_info ) {
8931 node_info.end_pos = ctxt->input->consumed +
8932 (CUR_PTR - ctxt->input->base);
8933 node_info.end_line = ctxt->input->line;
8934 node_info.node = ret;
8935 xmlParserAddNodeInfo(ctxt, &node_info);
8936 }
8937}
8938
8939/**
8940 * xmlParseVersionNum:
8941 * @ctxt: an XML parser context
8942 *
8943 * parse the XML version value.
8944 *
8945 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8946 *
8947 * Returns the string giving the XML version number, or NULL
8948 */
8949xmlChar *
8950xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8951 xmlChar *buf = NULL;
8952 int len = 0;
8953 int size = 10;
8954 xmlChar cur;
8955
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008956 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008957 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008958 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008959 return(NULL);
8960 }
8961 cur = CUR;
8962 while (((cur >= 'a') && (cur <= 'z')) ||
8963 ((cur >= 'A') && (cur <= 'Z')) ||
8964 ((cur >= '0') && (cur <= '9')) ||
8965 (cur == '_') || (cur == '.') ||
8966 (cur == ':') || (cur == '-')) {
8967 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008968 xmlChar *tmp;
8969
Owen Taylor3473f882001-02-23 17:55:21 +00008970 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008971 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8972 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008973 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008974 return(NULL);
8975 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008976 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008977 }
8978 buf[len++] = cur;
8979 NEXT;
8980 cur=CUR;
8981 }
8982 buf[len] = 0;
8983 return(buf);
8984}
8985
8986/**
8987 * xmlParseVersionInfo:
8988 * @ctxt: an XML parser context
8989 *
8990 * parse the XML version.
8991 *
8992 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8993 *
8994 * [25] Eq ::= S? '=' S?
8995 *
8996 * Returns the version string, e.g. "1.0"
8997 */
8998
8999xmlChar *
9000xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9001 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009002
Daniel Veillarda07050d2003-10-19 14:46:32 +00009003 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009004 SKIP(7);
9005 SKIP_BLANKS;
9006 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009007 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009008 return(NULL);
9009 }
9010 NEXT;
9011 SKIP_BLANKS;
9012 if (RAW == '"') {
9013 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009014 version = xmlParseVersionNum(ctxt);
9015 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009016 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009017 } else
9018 NEXT;
9019 } else if (RAW == '\''){
9020 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009021 version = xmlParseVersionNum(ctxt);
9022 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009023 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009024 } else
9025 NEXT;
9026 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009027 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009028 }
9029 }
9030 return(version);
9031}
9032
9033/**
9034 * xmlParseEncName:
9035 * @ctxt: an XML parser context
9036 *
9037 * parse the XML encoding name
9038 *
9039 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9040 *
9041 * Returns the encoding name value or NULL
9042 */
9043xmlChar *
9044xmlParseEncName(xmlParserCtxtPtr ctxt) {
9045 xmlChar *buf = NULL;
9046 int len = 0;
9047 int size = 10;
9048 xmlChar cur;
9049
9050 cur = CUR;
9051 if (((cur >= 'a') && (cur <= 'z')) ||
9052 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009053 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009054 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009055 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009056 return(NULL);
9057 }
9058
9059 buf[len++] = cur;
9060 NEXT;
9061 cur = CUR;
9062 while (((cur >= 'a') && (cur <= 'z')) ||
9063 ((cur >= 'A') && (cur <= 'Z')) ||
9064 ((cur >= '0') && (cur <= '9')) ||
9065 (cur == '.') || (cur == '_') ||
9066 (cur == '-')) {
9067 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009068 xmlChar *tmp;
9069
Owen Taylor3473f882001-02-23 17:55:21 +00009070 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009071 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9072 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009073 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009074 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009075 return(NULL);
9076 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009077 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009078 }
9079 buf[len++] = cur;
9080 NEXT;
9081 cur = CUR;
9082 if (cur == 0) {
9083 SHRINK;
9084 GROW;
9085 cur = CUR;
9086 }
9087 }
9088 buf[len] = 0;
9089 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009090 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009091 }
9092 return(buf);
9093}
9094
9095/**
9096 * xmlParseEncodingDecl:
9097 * @ctxt: an XML parser context
9098 *
9099 * parse the XML encoding declaration
9100 *
9101 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9102 *
9103 * this setups the conversion filters.
9104 *
9105 * Returns the encoding value or NULL
9106 */
9107
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009108const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009109xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9110 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009111
9112 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009113 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009114 SKIP(8);
9115 SKIP_BLANKS;
9116 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009117 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009118 return(NULL);
9119 }
9120 NEXT;
9121 SKIP_BLANKS;
9122 if (RAW == '"') {
9123 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009124 encoding = xmlParseEncName(ctxt);
9125 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009126 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009127 } else
9128 NEXT;
9129 } else if (RAW == '\''){
9130 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009131 encoding = xmlParseEncName(ctxt);
9132 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009133 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009134 } else
9135 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009136 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009137 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009138 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009139 /*
9140 * UTF-16 encoding stwich has already taken place at this stage,
9141 * more over the little-endian/big-endian selection is already done
9142 */
9143 if ((encoding != NULL) &&
9144 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9145 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009146 if (ctxt->encoding != NULL)
9147 xmlFree((xmlChar *) ctxt->encoding);
9148 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009149 }
9150 /*
9151 * UTF-8 encoding is handled natively
9152 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009153 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009154 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9155 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009156 if (ctxt->encoding != NULL)
9157 xmlFree((xmlChar *) ctxt->encoding);
9158 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009159 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009160 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009161 xmlCharEncodingHandlerPtr handler;
9162
9163 if (ctxt->input->encoding != NULL)
9164 xmlFree((xmlChar *) ctxt->input->encoding);
9165 ctxt->input->encoding = encoding;
9166
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009167 handler = xmlFindCharEncodingHandler((const char *) encoding);
9168 if (handler != NULL) {
9169 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009170 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009171 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009172 "Unsupported encoding %s\n", encoding);
9173 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009174 }
9175 }
9176 }
9177 return(encoding);
9178}
9179
9180/**
9181 * xmlParseSDDecl:
9182 * @ctxt: an XML parser context
9183 *
9184 * parse the XML standalone declaration
9185 *
9186 * [32] SDDecl ::= S 'standalone' Eq
9187 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9188 *
9189 * [ VC: Standalone Document Declaration ]
9190 * TODO The standalone document declaration must have the value "no"
9191 * if any external markup declarations contain declarations of:
9192 * - attributes with default values, if elements to which these
9193 * attributes apply appear in the document without specifications
9194 * of values for these attributes, or
9195 * - entities (other than amp, lt, gt, apos, quot), if references
9196 * to those entities appear in the document, or
9197 * - attributes with values subject to normalization, where the
9198 * attribute appears in the document with a value which will change
9199 * as a result of normalization, or
9200 * - element types with element content, if white space occurs directly
9201 * within any instance of those types.
9202 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009203 * Returns:
9204 * 1 if standalone="yes"
9205 * 0 if standalone="no"
9206 * -2 if standalone attribute is missing or invalid
9207 * (A standalone value of -2 means that the XML declaration was found,
9208 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009209 */
9210
9211int
9212xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009213 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009214
9215 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009216 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009217 SKIP(10);
9218 SKIP_BLANKS;
9219 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009220 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009221 return(standalone);
9222 }
9223 NEXT;
9224 SKIP_BLANKS;
9225 if (RAW == '\''){
9226 NEXT;
9227 if ((RAW == 'n') && (NXT(1) == 'o')) {
9228 standalone = 0;
9229 SKIP(2);
9230 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9231 (NXT(2) == 's')) {
9232 standalone = 1;
9233 SKIP(3);
9234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009235 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009236 }
9237 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009238 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009239 } else
9240 NEXT;
9241 } else if (RAW == '"'){
9242 NEXT;
9243 if ((RAW == 'n') && (NXT(1) == 'o')) {
9244 standalone = 0;
9245 SKIP(2);
9246 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9247 (NXT(2) == 's')) {
9248 standalone = 1;
9249 SKIP(3);
9250 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009251 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009252 }
9253 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009254 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009255 } else
9256 NEXT;
9257 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009258 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009259 }
9260 }
9261 return(standalone);
9262}
9263
9264/**
9265 * xmlParseXMLDecl:
9266 * @ctxt: an XML parser context
9267 *
9268 * parse an XML declaration header
9269 *
9270 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9271 */
9272
9273void
9274xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9275 xmlChar *version;
9276
9277 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009278 * This value for standalone indicates that the document has an
9279 * XML declaration but it does not have a standalone attribute.
9280 * It will be overwritten later if a standalone attribute is found.
9281 */
9282 ctxt->input->standalone = -2;
9283
9284 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009285 * We know that '<?xml' is here.
9286 */
9287 SKIP(5);
9288
William M. Brack76e95df2003-10-18 16:20:14 +00009289 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9291 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009292 }
9293 SKIP_BLANKS;
9294
9295 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009296 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009297 */
9298 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009299 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009300 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009301 } else {
9302 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9303 /*
9304 * TODO: Blueberry should be detected here
9305 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009306 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9307 "Unsupported version '%s'\n",
9308 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009309 }
9310 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009311 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009312 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009313 }
Owen Taylor3473f882001-02-23 17:55:21 +00009314
9315 /*
9316 * We may have the encoding declaration
9317 */
William M. Brack76e95df2003-10-18 16:20:14 +00009318 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009319 if ((RAW == '?') && (NXT(1) == '>')) {
9320 SKIP(2);
9321 return;
9322 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009324 }
9325 xmlParseEncodingDecl(ctxt);
9326 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9327 /*
9328 * The XML REC instructs us to stop parsing right here
9329 */
9330 return;
9331 }
9332
9333 /*
9334 * We may have the standalone status.
9335 */
William M. Brack76e95df2003-10-18 16:20:14 +00009336 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009337 if ((RAW == '?') && (NXT(1) == '>')) {
9338 SKIP(2);
9339 return;
9340 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009342 }
9343 SKIP_BLANKS;
9344 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9345
9346 SKIP_BLANKS;
9347 if ((RAW == '?') && (NXT(1) == '>')) {
9348 SKIP(2);
9349 } else if (RAW == '>') {
9350 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009351 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009352 NEXT;
9353 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009354 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009355 MOVETO_ENDTAG(CUR_PTR);
9356 NEXT;
9357 }
9358}
9359
9360/**
9361 * xmlParseMisc:
9362 * @ctxt: an XML parser context
9363 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009364 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009365 *
9366 * [27] Misc ::= Comment | PI | S
9367 */
9368
9369void
9370xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009371 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009372 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009373 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009374 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009375 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009376 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009377 NEXT;
9378 } else
9379 xmlParseComment(ctxt);
9380 }
9381}
9382
9383/**
9384 * xmlParseDocument:
9385 * @ctxt: an XML parser context
9386 *
9387 * parse an XML document (and build a tree if using the standard SAX
9388 * interface).
9389 *
9390 * [1] document ::= prolog element Misc*
9391 *
9392 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9393 *
9394 * Returns 0, -1 in case of error. the parser context is augmented
9395 * as a result of the parsing.
9396 */
9397
9398int
9399xmlParseDocument(xmlParserCtxtPtr ctxt) {
9400 xmlChar start[4];
9401 xmlCharEncoding enc;
9402
9403 xmlInitParser();
9404
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009405 if ((ctxt == NULL) || (ctxt->input == NULL))
9406 return(-1);
9407
Owen Taylor3473f882001-02-23 17:55:21 +00009408 GROW;
9409
9410 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009411 * SAX: detecting the level.
9412 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009413 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009414
9415 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009416 * SAX: beginning of the document processing.
9417 */
9418 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9419 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9420
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009421 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9422 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009423 /*
9424 * Get the 4 first bytes and decode the charset
9425 * if enc != XML_CHAR_ENCODING_NONE
9426 * plug some encoding conversion routines.
9427 */
9428 start[0] = RAW;
9429 start[1] = NXT(1);
9430 start[2] = NXT(2);
9431 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009432 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009433 if (enc != XML_CHAR_ENCODING_NONE) {
9434 xmlSwitchEncoding(ctxt, enc);
9435 }
Owen Taylor3473f882001-02-23 17:55:21 +00009436 }
9437
9438
9439 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009440 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009441 }
9442
9443 /*
9444 * Check for the XMLDecl in the Prolog.
9445 */
9446 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009447 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009448
9449 /*
9450 * Note that we will switch encoding on the fly.
9451 */
9452 xmlParseXMLDecl(ctxt);
9453 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9454 /*
9455 * The XML REC instructs us to stop parsing right here
9456 */
9457 return(-1);
9458 }
9459 ctxt->standalone = ctxt->input->standalone;
9460 SKIP_BLANKS;
9461 } else {
9462 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9463 }
9464 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9465 ctxt->sax->startDocument(ctxt->userData);
9466
9467 /*
9468 * The Misc part of the Prolog
9469 */
9470 GROW;
9471 xmlParseMisc(ctxt);
9472
9473 /*
9474 * Then possibly doc type declaration(s) and more Misc
9475 * (doctypedecl Misc*)?
9476 */
9477 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009478 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009479
9480 ctxt->inSubset = 1;
9481 xmlParseDocTypeDecl(ctxt);
9482 if (RAW == '[') {
9483 ctxt->instate = XML_PARSER_DTD;
9484 xmlParseInternalSubset(ctxt);
9485 }
9486
9487 /*
9488 * Create and update the external subset.
9489 */
9490 ctxt->inSubset = 2;
9491 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9492 (!ctxt->disableSAX))
9493 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9494 ctxt->extSubSystem, ctxt->extSubURI);
9495 ctxt->inSubset = 0;
9496
Daniel Veillardac4118d2008-01-11 05:27:32 +00009497 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009498
9499 ctxt->instate = XML_PARSER_PROLOG;
9500 xmlParseMisc(ctxt);
9501 }
9502
9503 /*
9504 * Time to start parsing the tree itself
9505 */
9506 GROW;
9507 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009508 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9509 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009510 } else {
9511 ctxt->instate = XML_PARSER_CONTENT;
9512 xmlParseElement(ctxt);
9513 ctxt->instate = XML_PARSER_EPILOG;
9514
9515
9516 /*
9517 * The Misc part at the end
9518 */
9519 xmlParseMisc(ctxt);
9520
Daniel Veillard561b7f82002-03-20 21:55:57 +00009521 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009522 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009523 }
9524 ctxt->instate = XML_PARSER_EOF;
9525 }
9526
9527 /*
9528 * SAX: end of the document processing.
9529 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009530 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009531 ctxt->sax->endDocument(ctxt->userData);
9532
Daniel Veillard5997aca2002-03-18 18:36:20 +00009533 /*
9534 * Remove locally kept entity definitions if the tree was not built
9535 */
9536 if ((ctxt->myDoc != NULL) &&
9537 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9538 xmlFreeDoc(ctxt->myDoc);
9539 ctxt->myDoc = NULL;
9540 }
9541
Daniel Veillardc7612992002-02-17 22:47:37 +00009542 if (! ctxt->wellFormed) {
9543 ctxt->valid = 0;
9544 return(-1);
9545 }
Owen Taylor3473f882001-02-23 17:55:21 +00009546 return(0);
9547}
9548
9549/**
9550 * xmlParseExtParsedEnt:
9551 * @ctxt: an XML parser context
9552 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009553 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009554 * An external general parsed entity is well-formed if it matches the
9555 * production labeled extParsedEnt.
9556 *
9557 * [78] extParsedEnt ::= TextDecl? content
9558 *
9559 * Returns 0, -1 in case of error. the parser context is augmented
9560 * as a result of the parsing.
9561 */
9562
9563int
9564xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9565 xmlChar start[4];
9566 xmlCharEncoding enc;
9567
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009568 if ((ctxt == NULL) || (ctxt->input == NULL))
9569 return(-1);
9570
Owen Taylor3473f882001-02-23 17:55:21 +00009571 xmlDefaultSAXHandlerInit();
9572
Daniel Veillard309f81d2003-09-23 09:02:53 +00009573 xmlDetectSAX2(ctxt);
9574
Owen Taylor3473f882001-02-23 17:55:21 +00009575 GROW;
9576
9577 /*
9578 * SAX: beginning of the document processing.
9579 */
9580 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9581 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9582
9583 /*
9584 * Get the 4 first bytes and decode the charset
9585 * if enc != XML_CHAR_ENCODING_NONE
9586 * plug some encoding conversion routines.
9587 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009588 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9589 start[0] = RAW;
9590 start[1] = NXT(1);
9591 start[2] = NXT(2);
9592 start[3] = NXT(3);
9593 enc = xmlDetectCharEncoding(start, 4);
9594 if (enc != XML_CHAR_ENCODING_NONE) {
9595 xmlSwitchEncoding(ctxt, enc);
9596 }
Owen Taylor3473f882001-02-23 17:55:21 +00009597 }
9598
9599
9600 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009601 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009602 }
9603
9604 /*
9605 * Check for the XMLDecl in the Prolog.
9606 */
9607 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009608 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009609
9610 /*
9611 * Note that we will switch encoding on the fly.
9612 */
9613 xmlParseXMLDecl(ctxt);
9614 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9615 /*
9616 * The XML REC instructs us to stop parsing right here
9617 */
9618 return(-1);
9619 }
9620 SKIP_BLANKS;
9621 } else {
9622 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9623 }
9624 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9625 ctxt->sax->startDocument(ctxt->userData);
9626
9627 /*
9628 * Doing validity checking on chunk doesn't make sense
9629 */
9630 ctxt->instate = XML_PARSER_CONTENT;
9631 ctxt->validate = 0;
9632 ctxt->loadsubset = 0;
9633 ctxt->depth = 0;
9634
9635 xmlParseContent(ctxt);
9636
9637 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009638 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009639 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009640 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009641 }
9642
9643 /*
9644 * SAX: end of the document processing.
9645 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009646 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009647 ctxt->sax->endDocument(ctxt->userData);
9648
9649 if (! ctxt->wellFormed) return(-1);
9650 return(0);
9651}
9652
Daniel Veillard73b013f2003-09-30 12:36:01 +00009653#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009654/************************************************************************
9655 * *
9656 * Progressive parsing interfaces *
9657 * *
9658 ************************************************************************/
9659
9660/**
9661 * xmlParseLookupSequence:
9662 * @ctxt: an XML parser context
9663 * @first: the first char to lookup
9664 * @next: the next char to lookup or zero
9665 * @third: the next char to lookup or zero
9666 *
9667 * Try to find if a sequence (first, next, third) or just (first next) or
9668 * (first) is available in the input stream.
9669 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9670 * to avoid rescanning sequences of bytes, it DOES change the state of the
9671 * parser, do not use liberally.
9672 *
9673 * Returns the index to the current parsing point if the full sequence
9674 * is available, -1 otherwise.
9675 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009676static int
Owen Taylor3473f882001-02-23 17:55:21 +00009677xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9678 xmlChar next, xmlChar third) {
9679 int base, len;
9680 xmlParserInputPtr in;
9681 const xmlChar *buf;
9682
9683 in = ctxt->input;
9684 if (in == NULL) return(-1);
9685 base = in->cur - in->base;
9686 if (base < 0) return(-1);
9687 if (ctxt->checkIndex > base)
9688 base = ctxt->checkIndex;
9689 if (in->buf == NULL) {
9690 buf = in->base;
9691 len = in->length;
9692 } else {
9693 buf = in->buf->buffer->content;
9694 len = in->buf->buffer->use;
9695 }
9696 /* take into account the sequence length */
9697 if (third) len -= 2;
9698 else if (next) len --;
9699 for (;base < len;base++) {
9700 if (buf[base] == first) {
9701 if (third != 0) {
9702 if ((buf[base + 1] != next) ||
9703 (buf[base + 2] != third)) continue;
9704 } else if (next != 0) {
9705 if (buf[base + 1] != next) continue;
9706 }
9707 ctxt->checkIndex = 0;
9708#ifdef DEBUG_PUSH
9709 if (next == 0)
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: lookup '%c' found at %d\n",
9712 first, base);
9713 else if (third == 0)
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: lookup '%c%c' found at %d\n",
9716 first, next, base);
9717 else
9718 xmlGenericError(xmlGenericErrorContext,
9719 "PP: lookup '%c%c%c' found at %d\n",
9720 first, next, third, base);
9721#endif
9722 return(base - (in->cur - in->base));
9723 }
9724 }
9725 ctxt->checkIndex = base;
9726#ifdef DEBUG_PUSH
9727 if (next == 0)
9728 xmlGenericError(xmlGenericErrorContext,
9729 "PP: lookup '%c' failed\n", first);
9730 else if (third == 0)
9731 xmlGenericError(xmlGenericErrorContext,
9732 "PP: lookup '%c%c' failed\n", first, next);
9733 else
9734 xmlGenericError(xmlGenericErrorContext,
9735 "PP: lookup '%c%c%c' failed\n", first, next, third);
9736#endif
9737 return(-1);
9738}
9739
9740/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009741 * xmlParseGetLasts:
9742 * @ctxt: an XML parser context
9743 * @lastlt: pointer to store the last '<' from the input
9744 * @lastgt: pointer to store the last '>' from the input
9745 *
9746 * Lookup the last < and > in the current chunk
9747 */
9748static void
9749xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9750 const xmlChar **lastgt) {
9751 const xmlChar *tmp;
9752
9753 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9754 xmlGenericError(xmlGenericErrorContext,
9755 "Internal error: xmlParseGetLasts\n");
9756 return;
9757 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009758 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009759 tmp = ctxt->input->end;
9760 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009761 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009762 if (tmp < ctxt->input->base) {
9763 *lastlt = NULL;
9764 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009765 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009766 *lastlt = tmp;
9767 tmp++;
9768 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9769 if (*tmp == '\'') {
9770 tmp++;
9771 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9772 if (tmp < ctxt->input->end) tmp++;
9773 } else if (*tmp == '"') {
9774 tmp++;
9775 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9776 if (tmp < ctxt->input->end) tmp++;
9777 } else
9778 tmp++;
9779 }
9780 if (tmp < ctxt->input->end)
9781 *lastgt = tmp;
9782 else {
9783 tmp = *lastlt;
9784 tmp--;
9785 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9786 if (tmp >= ctxt->input->base)
9787 *lastgt = tmp;
9788 else
9789 *lastgt = NULL;
9790 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009791 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009792 } else {
9793 *lastlt = NULL;
9794 *lastgt = NULL;
9795 }
9796}
9797/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009798 * xmlCheckCdataPush:
9799 * @cur: pointer to the bock of characters
9800 * @len: length of the block in bytes
9801 *
9802 * Check that the block of characters is okay as SCdata content [20]
9803 *
9804 * Returns the number of bytes to pass if okay, a negative index where an
9805 * UTF-8 error occured otherwise
9806 */
9807static int
9808xmlCheckCdataPush(const xmlChar *utf, int len) {
9809 int ix;
9810 unsigned char c;
9811 int codepoint;
9812
9813 if ((utf == NULL) || (len <= 0))
9814 return(0);
9815
9816 for (ix = 0; ix < len;) { /* string is 0-terminated */
9817 c = utf[ix];
9818 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9819 if (c >= 0x20)
9820 ix++;
9821 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9822 ix++;
9823 else
9824 return(-ix);
9825 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9826 if (ix + 2 > len) return(ix);
9827 if ((utf[ix+1] & 0xc0 ) != 0x80)
9828 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009829 codepoint = (utf[ix] & 0x1f) << 6;
9830 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009831 if (!xmlIsCharQ(codepoint))
9832 return(-ix);
9833 ix += 2;
9834 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9835 if (ix + 3 > len) return(ix);
9836 if (((utf[ix+1] & 0xc0) != 0x80) ||
9837 ((utf[ix+2] & 0xc0) != 0x80))
9838 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009839 codepoint = (utf[ix] & 0xf) << 12;
9840 codepoint |= (utf[ix+1] & 0x3f) << 6;
9841 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009842 if (!xmlIsCharQ(codepoint))
9843 return(-ix);
9844 ix += 3;
9845 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9846 if (ix + 4 > len) return(ix);
9847 if (((utf[ix+1] & 0xc0) != 0x80) ||
9848 ((utf[ix+2] & 0xc0) != 0x80) ||
9849 ((utf[ix+3] & 0xc0) != 0x80))
9850 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009851 codepoint = (utf[ix] & 0x7) << 18;
9852 codepoint |= (utf[ix+1] & 0x3f) << 12;
9853 codepoint |= (utf[ix+2] & 0x3f) << 6;
9854 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009855 if (!xmlIsCharQ(codepoint))
9856 return(-ix);
9857 ix += 4;
9858 } else /* unknown encoding */
9859 return(-ix);
9860 }
9861 return(ix);
9862}
9863
9864/**
Owen Taylor3473f882001-02-23 17:55:21 +00009865 * xmlParseTryOrFinish:
9866 * @ctxt: an XML parser context
9867 * @terminate: last chunk indicator
9868 *
9869 * Try to progress on parsing
9870 *
9871 * Returns zero if no parsing was possible
9872 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009873static int
Owen Taylor3473f882001-02-23 17:55:21 +00009874xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9875 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009876 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009877 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009878 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009879
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009880 if (ctxt->input == NULL)
9881 return(0);
9882
Owen Taylor3473f882001-02-23 17:55:21 +00009883#ifdef DEBUG_PUSH
9884 switch (ctxt->instate) {
9885 case XML_PARSER_EOF:
9886 xmlGenericError(xmlGenericErrorContext,
9887 "PP: try EOF\n"); break;
9888 case XML_PARSER_START:
9889 xmlGenericError(xmlGenericErrorContext,
9890 "PP: try START\n"); break;
9891 case XML_PARSER_MISC:
9892 xmlGenericError(xmlGenericErrorContext,
9893 "PP: try MISC\n");break;
9894 case XML_PARSER_COMMENT:
9895 xmlGenericError(xmlGenericErrorContext,
9896 "PP: try COMMENT\n");break;
9897 case XML_PARSER_PROLOG:
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: try PROLOG\n");break;
9900 case XML_PARSER_START_TAG:
9901 xmlGenericError(xmlGenericErrorContext,
9902 "PP: try START_TAG\n");break;
9903 case XML_PARSER_CONTENT:
9904 xmlGenericError(xmlGenericErrorContext,
9905 "PP: try CONTENT\n");break;
9906 case XML_PARSER_CDATA_SECTION:
9907 xmlGenericError(xmlGenericErrorContext,
9908 "PP: try CDATA_SECTION\n");break;
9909 case XML_PARSER_END_TAG:
9910 xmlGenericError(xmlGenericErrorContext,
9911 "PP: try END_TAG\n");break;
9912 case XML_PARSER_ENTITY_DECL:
9913 xmlGenericError(xmlGenericErrorContext,
9914 "PP: try ENTITY_DECL\n");break;
9915 case XML_PARSER_ENTITY_VALUE:
9916 xmlGenericError(xmlGenericErrorContext,
9917 "PP: try ENTITY_VALUE\n");break;
9918 case XML_PARSER_ATTRIBUTE_VALUE:
9919 xmlGenericError(xmlGenericErrorContext,
9920 "PP: try ATTRIBUTE_VALUE\n");break;
9921 case XML_PARSER_DTD:
9922 xmlGenericError(xmlGenericErrorContext,
9923 "PP: try DTD\n");break;
9924 case XML_PARSER_EPILOG:
9925 xmlGenericError(xmlGenericErrorContext,
9926 "PP: try EPILOG\n");break;
9927 case XML_PARSER_PI:
9928 xmlGenericError(xmlGenericErrorContext,
9929 "PP: try PI\n");break;
9930 case XML_PARSER_IGNORE:
9931 xmlGenericError(xmlGenericErrorContext,
9932 "PP: try IGNORE\n");break;
9933 }
9934#endif
9935
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009936 if ((ctxt->input != NULL) &&
9937 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009938 xmlSHRINK(ctxt);
9939 ctxt->checkIndex = 0;
9940 }
9941 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009942
Daniel Veillarda880b122003-04-21 21:36:41 +00009943 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009944 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009945 return(0);
9946
9947
Owen Taylor3473f882001-02-23 17:55:21 +00009948 /*
9949 * Pop-up of finished entities.
9950 */
9951 while ((RAW == 0) && (ctxt->inputNr > 1))
9952 xmlPopInput(ctxt);
9953
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009954 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009955 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009956 avail = ctxt->input->length -
9957 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009958 else {
9959 /*
9960 * If we are operating on converted input, try to flush
9961 * remainng chars to avoid them stalling in the non-converted
9962 * buffer.
9963 */
9964 if ((ctxt->input->buf->raw != NULL) &&
9965 (ctxt->input->buf->raw->use > 0)) {
9966 int base = ctxt->input->base -
9967 ctxt->input->buf->buffer->content;
9968 int current = ctxt->input->cur - ctxt->input->base;
9969
9970 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9971 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9972 ctxt->input->cur = ctxt->input->base + current;
9973 ctxt->input->end =
9974 &ctxt->input->buf->buffer->content[
9975 ctxt->input->buf->buffer->use];
9976 }
9977 avail = ctxt->input->buf->buffer->use -
9978 (ctxt->input->cur - ctxt->input->base);
9979 }
Owen Taylor3473f882001-02-23 17:55:21 +00009980 if (avail < 1)
9981 goto done;
9982 switch (ctxt->instate) {
9983 case XML_PARSER_EOF:
9984 /*
9985 * Document parsing is done !
9986 */
9987 goto done;
9988 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009989 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9990 xmlChar start[4];
9991 xmlCharEncoding enc;
9992
9993 /*
9994 * Very first chars read from the document flow.
9995 */
9996 if (avail < 4)
9997 goto done;
9998
9999 /*
10000 * Get the 4 first bytes and decode the charset
10001 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010002 * plug some encoding conversion routines,
10003 * else xmlSwitchEncoding will set to (default)
10004 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010005 */
10006 start[0] = RAW;
10007 start[1] = NXT(1);
10008 start[2] = NXT(2);
10009 start[3] = NXT(3);
10010 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010011 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010012 break;
10013 }
Owen Taylor3473f882001-02-23 17:55:21 +000010014
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010015 if (avail < 2)
10016 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010017 cur = ctxt->input->cur[0];
10018 next = ctxt->input->cur[1];
10019 if (cur == 0) {
10020 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10021 ctxt->sax->setDocumentLocator(ctxt->userData,
10022 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010023 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010024 ctxt->instate = XML_PARSER_EOF;
10025#ifdef DEBUG_PUSH
10026 xmlGenericError(xmlGenericErrorContext,
10027 "PP: entering EOF\n");
10028#endif
10029 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10030 ctxt->sax->endDocument(ctxt->userData);
10031 goto done;
10032 }
10033 if ((cur == '<') && (next == '?')) {
10034 /* PI or XML decl */
10035 if (avail < 5) return(ret);
10036 if ((!terminate) &&
10037 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10038 return(ret);
10039 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10040 ctxt->sax->setDocumentLocator(ctxt->userData,
10041 &xmlDefaultSAXLocator);
10042 if ((ctxt->input->cur[2] == 'x') &&
10043 (ctxt->input->cur[3] == 'm') &&
10044 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010045 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010046 ret += 5;
10047#ifdef DEBUG_PUSH
10048 xmlGenericError(xmlGenericErrorContext,
10049 "PP: Parsing XML Decl\n");
10050#endif
10051 xmlParseXMLDecl(ctxt);
10052 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10053 /*
10054 * The XML REC instructs us to stop parsing right
10055 * here
10056 */
10057 ctxt->instate = XML_PARSER_EOF;
10058 return(0);
10059 }
10060 ctxt->standalone = ctxt->input->standalone;
10061 if ((ctxt->encoding == NULL) &&
10062 (ctxt->input->encoding != NULL))
10063 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10064 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10065 (!ctxt->disableSAX))
10066 ctxt->sax->startDocument(ctxt->userData);
10067 ctxt->instate = XML_PARSER_MISC;
10068#ifdef DEBUG_PUSH
10069 xmlGenericError(xmlGenericErrorContext,
10070 "PP: entering MISC\n");
10071#endif
10072 } else {
10073 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10074 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10075 (!ctxt->disableSAX))
10076 ctxt->sax->startDocument(ctxt->userData);
10077 ctxt->instate = XML_PARSER_MISC;
10078#ifdef DEBUG_PUSH
10079 xmlGenericError(xmlGenericErrorContext,
10080 "PP: entering MISC\n");
10081#endif
10082 }
10083 } else {
10084 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10085 ctxt->sax->setDocumentLocator(ctxt->userData,
10086 &xmlDefaultSAXLocator);
10087 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010088 if (ctxt->version == NULL) {
10089 xmlErrMemory(ctxt, NULL);
10090 break;
10091 }
Owen Taylor3473f882001-02-23 17:55:21 +000010092 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10093 (!ctxt->disableSAX))
10094 ctxt->sax->startDocument(ctxt->userData);
10095 ctxt->instate = XML_PARSER_MISC;
10096#ifdef DEBUG_PUSH
10097 xmlGenericError(xmlGenericErrorContext,
10098 "PP: entering MISC\n");
10099#endif
10100 }
10101 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010102 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010103 const xmlChar *name;
10104 const xmlChar *prefix;
10105 const xmlChar *URI;
10106 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010107
10108 if ((avail < 2) && (ctxt->inputNr == 1))
10109 goto done;
10110 cur = ctxt->input->cur[0];
10111 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010112 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010113 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010114 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10115 ctxt->sax->endDocument(ctxt->userData);
10116 goto done;
10117 }
10118 if (!terminate) {
10119 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010120 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010121 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010122 goto done;
10123 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10124 goto done;
10125 }
10126 }
10127 if (ctxt->spaceNr == 0)
10128 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010129 else if (*ctxt->space == -2)
10130 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010131 else
10132 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010133#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010134 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010135#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010136 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010137#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010138 else
10139 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010140#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010141 if (name == NULL) {
10142 spacePop(ctxt);
10143 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010144 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10145 ctxt->sax->endDocument(ctxt->userData);
10146 goto done;
10147 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010148#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010149 /*
10150 * [ VC: Root Element Type ]
10151 * The Name in the document type declaration must match
10152 * the element type of the root element.
10153 */
10154 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10155 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10156 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010157#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010158
10159 /*
10160 * Check for an Empty Element.
10161 */
10162 if ((RAW == '/') && (NXT(1) == '>')) {
10163 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010164
10165 if (ctxt->sax2) {
10166 if ((ctxt->sax != NULL) &&
10167 (ctxt->sax->endElementNs != NULL) &&
10168 (!ctxt->disableSAX))
10169 ctxt->sax->endElementNs(ctxt->userData, name,
10170 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010171 if (ctxt->nsNr - nsNr > 0)
10172 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010173#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010174 } else {
10175 if ((ctxt->sax != NULL) &&
10176 (ctxt->sax->endElement != NULL) &&
10177 (!ctxt->disableSAX))
10178 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010179#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010180 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010181 spacePop(ctxt);
10182 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010183 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010184 } else {
10185 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010186 }
10187 break;
10188 }
10189 if (RAW == '>') {
10190 NEXT;
10191 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010192 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010193 "Couldn't find end of Start Tag %s\n",
10194 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010195 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010196 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010197 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010198 if (ctxt->sax2)
10199 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010200#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010201 else
10202 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010203#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010204
Daniel Veillarda880b122003-04-21 21:36:41 +000010205 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010206 break;
10207 }
10208 case XML_PARSER_CONTENT: {
10209 const xmlChar *test;
10210 unsigned int cons;
10211 if ((avail < 2) && (ctxt->inputNr == 1))
10212 goto done;
10213 cur = ctxt->input->cur[0];
10214 next = ctxt->input->cur[1];
10215
10216 test = CUR_PTR;
10217 cons = ctxt->input->consumed;
10218 if ((cur == '<') && (next == '/')) {
10219 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010220 break;
10221 } else if ((cur == '<') && (next == '?')) {
10222 if ((!terminate) &&
10223 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10224 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010225 xmlParsePI(ctxt);
10226 } else if ((cur == '<') && (next != '!')) {
10227 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010228 break;
10229 } else if ((cur == '<') && (next == '!') &&
10230 (ctxt->input->cur[2] == '-') &&
10231 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010232 int term;
10233
10234 if (avail < 4)
10235 goto done;
10236 ctxt->input->cur += 4;
10237 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10238 ctxt->input->cur -= 4;
10239 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010240 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010241 xmlParseComment(ctxt);
10242 ctxt->instate = XML_PARSER_CONTENT;
10243 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10244 (ctxt->input->cur[2] == '[') &&
10245 (ctxt->input->cur[3] == 'C') &&
10246 (ctxt->input->cur[4] == 'D') &&
10247 (ctxt->input->cur[5] == 'A') &&
10248 (ctxt->input->cur[6] == 'T') &&
10249 (ctxt->input->cur[7] == 'A') &&
10250 (ctxt->input->cur[8] == '[')) {
10251 SKIP(9);
10252 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010253 break;
10254 } else if ((cur == '<') && (next == '!') &&
10255 (avail < 9)) {
10256 goto done;
10257 } else if (cur == '&') {
10258 if ((!terminate) &&
10259 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10260 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010261 xmlParseReference(ctxt);
10262 } else {
10263 /* TODO Avoid the extra copy, handle directly !!! */
10264 /*
10265 * Goal of the following test is:
10266 * - minimize calls to the SAX 'character' callback
10267 * when they are mergeable
10268 * - handle an problem for isBlank when we only parse
10269 * a sequence of blank chars and the next one is
10270 * not available to check against '<' presence.
10271 * - tries to homogenize the differences in SAX
10272 * callbacks between the push and pull versions
10273 * of the parser.
10274 */
10275 if ((ctxt->inputNr == 1) &&
10276 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10277 if (!terminate) {
10278 if (ctxt->progressive) {
10279 if ((lastlt == NULL) ||
10280 (ctxt->input->cur > lastlt))
10281 goto done;
10282 } else if (xmlParseLookupSequence(ctxt,
10283 '<', 0, 0) < 0) {
10284 goto done;
10285 }
10286 }
10287 }
10288 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010289 xmlParseCharData(ctxt, 0);
10290 }
10291 /*
10292 * Pop-up of finished entities.
10293 */
10294 while ((RAW == 0) && (ctxt->inputNr > 1))
10295 xmlPopInput(ctxt);
10296 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010297 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10298 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010299 ctxt->instate = XML_PARSER_EOF;
10300 break;
10301 }
10302 break;
10303 }
10304 case XML_PARSER_END_TAG:
10305 if (avail < 2)
10306 goto done;
10307 if (!terminate) {
10308 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010309 /* > can be found unescaped in attribute values */
10310 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010311 goto done;
10312 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10313 goto done;
10314 }
10315 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010316 if (ctxt->sax2) {
10317 xmlParseEndTag2(ctxt,
10318 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10319 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010320 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010321 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010322 }
10323#ifdef LIBXML_SAX1_ENABLED
10324 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010325 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010326#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010327 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010328 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010329 } else {
10330 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010331 }
10332 break;
10333 case XML_PARSER_CDATA_SECTION: {
10334 /*
10335 * The Push mode need to have the SAX callback for
10336 * cdataBlock merge back contiguous callbacks.
10337 */
10338 int base;
10339
10340 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10341 if (base < 0) {
10342 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010343 int tmp;
10344
10345 tmp = xmlCheckCdataPush(ctxt->input->cur,
10346 XML_PARSER_BIG_BUFFER_SIZE);
10347 if (tmp < 0) {
10348 tmp = -tmp;
10349 ctxt->input->cur += tmp;
10350 goto encoding_error;
10351 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010352 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10353 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010354 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010355 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010356 else if (ctxt->sax->characters != NULL)
10357 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010358 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010359 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010360 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010361 ctxt->checkIndex = 0;
10362 }
10363 goto done;
10364 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010365 int tmp;
10366
10367 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10368 if ((tmp < 0) || (tmp != base)) {
10369 tmp = -tmp;
10370 ctxt->input->cur += tmp;
10371 goto encoding_error;
10372 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010373 if ((ctxt->sax != NULL) && (base == 0) &&
10374 (ctxt->sax->cdataBlock != NULL) &&
10375 (!ctxt->disableSAX)) {
10376 /*
10377 * Special case to provide identical behaviour
10378 * between pull and push parsers on enpty CDATA
10379 * sections
10380 */
10381 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10382 (!strncmp((const char *)&ctxt->input->cur[-9],
10383 "<![CDATA[", 9)))
10384 ctxt->sax->cdataBlock(ctxt->userData,
10385 BAD_CAST "", 0);
10386 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010387 (!ctxt->disableSAX)) {
10388 if (ctxt->sax->cdataBlock != NULL)
10389 ctxt->sax->cdataBlock(ctxt->userData,
10390 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010391 else if (ctxt->sax->characters != NULL)
10392 ctxt->sax->characters(ctxt->userData,
10393 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010394 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010395 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010396 ctxt->checkIndex = 0;
10397 ctxt->instate = XML_PARSER_CONTENT;
10398#ifdef DEBUG_PUSH
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: entering CONTENT\n");
10401#endif
10402 }
10403 break;
10404 }
Owen Taylor3473f882001-02-23 17:55:21 +000010405 case XML_PARSER_MISC:
10406 SKIP_BLANKS;
10407 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010408 avail = ctxt->input->length -
10409 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010410 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010411 avail = ctxt->input->buf->buffer->use -
10412 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010413 if (avail < 2)
10414 goto done;
10415 cur = ctxt->input->cur[0];
10416 next = ctxt->input->cur[1];
10417 if ((cur == '<') && (next == '?')) {
10418 if ((!terminate) &&
10419 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10420 goto done;
10421#ifdef DEBUG_PUSH
10422 xmlGenericError(xmlGenericErrorContext,
10423 "PP: Parsing PI\n");
10424#endif
10425 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010426 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010427 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010428 (ctxt->input->cur[2] == '-') &&
10429 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010430 if ((!terminate) &&
10431 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10432 goto done;
10433#ifdef DEBUG_PUSH
10434 xmlGenericError(xmlGenericErrorContext,
10435 "PP: Parsing Comment\n");
10436#endif
10437 xmlParseComment(ctxt);
10438 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010439 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010440 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010441 (ctxt->input->cur[2] == 'D') &&
10442 (ctxt->input->cur[3] == 'O') &&
10443 (ctxt->input->cur[4] == 'C') &&
10444 (ctxt->input->cur[5] == 'T') &&
10445 (ctxt->input->cur[6] == 'Y') &&
10446 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010447 (ctxt->input->cur[8] == 'E')) {
10448 if ((!terminate) &&
10449 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10450 goto done;
10451#ifdef DEBUG_PUSH
10452 xmlGenericError(xmlGenericErrorContext,
10453 "PP: Parsing internal subset\n");
10454#endif
10455 ctxt->inSubset = 1;
10456 xmlParseDocTypeDecl(ctxt);
10457 if (RAW == '[') {
10458 ctxt->instate = XML_PARSER_DTD;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: entering DTD\n");
10462#endif
10463 } else {
10464 /*
10465 * Create and update the external subset.
10466 */
10467 ctxt->inSubset = 2;
10468 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10469 (ctxt->sax->externalSubset != NULL))
10470 ctxt->sax->externalSubset(ctxt->userData,
10471 ctxt->intSubName, ctxt->extSubSystem,
10472 ctxt->extSubURI);
10473 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010474 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 ctxt->instate = XML_PARSER_PROLOG;
10476#ifdef DEBUG_PUSH
10477 xmlGenericError(xmlGenericErrorContext,
10478 "PP: entering PROLOG\n");
10479#endif
10480 }
10481 } else if ((cur == '<') && (next == '!') &&
10482 (avail < 9)) {
10483 goto done;
10484 } else {
10485 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010486 ctxt->progressive = 1;
10487 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010488#ifdef DEBUG_PUSH
10489 xmlGenericError(xmlGenericErrorContext,
10490 "PP: entering START_TAG\n");
10491#endif
10492 }
10493 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010494 case XML_PARSER_PROLOG:
10495 SKIP_BLANKS;
10496 if (ctxt->input->buf == NULL)
10497 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10498 else
10499 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10500 if (avail < 2)
10501 goto done;
10502 cur = ctxt->input->cur[0];
10503 next = ctxt->input->cur[1];
10504 if ((cur == '<') && (next == '?')) {
10505 if ((!terminate) &&
10506 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10507 goto done;
10508#ifdef DEBUG_PUSH
10509 xmlGenericError(xmlGenericErrorContext,
10510 "PP: Parsing PI\n");
10511#endif
10512 xmlParsePI(ctxt);
10513 } else if ((cur == '<') && (next == '!') &&
10514 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10515 if ((!terminate) &&
10516 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10517 goto done;
10518#ifdef DEBUG_PUSH
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: Parsing Comment\n");
10521#endif
10522 xmlParseComment(ctxt);
10523 ctxt->instate = XML_PARSER_PROLOG;
10524 } else if ((cur == '<') && (next == '!') &&
10525 (avail < 4)) {
10526 goto done;
10527 } else {
10528 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010529 if (ctxt->progressive == 0)
10530 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010531 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010532#ifdef DEBUG_PUSH
10533 xmlGenericError(xmlGenericErrorContext,
10534 "PP: entering START_TAG\n");
10535#endif
10536 }
10537 break;
10538 case XML_PARSER_EPILOG:
10539 SKIP_BLANKS;
10540 if (ctxt->input->buf == NULL)
10541 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10542 else
10543 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10544 if (avail < 2)
10545 goto done;
10546 cur = ctxt->input->cur[0];
10547 next = ctxt->input->cur[1];
10548 if ((cur == '<') && (next == '?')) {
10549 if ((!terminate) &&
10550 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10551 goto done;
10552#ifdef DEBUG_PUSH
10553 xmlGenericError(xmlGenericErrorContext,
10554 "PP: Parsing PI\n");
10555#endif
10556 xmlParsePI(ctxt);
10557 ctxt->instate = XML_PARSER_EPILOG;
10558 } else if ((cur == '<') && (next == '!') &&
10559 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10560 if ((!terminate) &&
10561 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10562 goto done;
10563#ifdef DEBUG_PUSH
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: Parsing Comment\n");
10566#endif
10567 xmlParseComment(ctxt);
10568 ctxt->instate = XML_PARSER_EPILOG;
10569 } else if ((cur == '<') && (next == '!') &&
10570 (avail < 4)) {
10571 goto done;
10572 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010573 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010574 ctxt->instate = XML_PARSER_EOF;
10575#ifdef DEBUG_PUSH
10576 xmlGenericError(xmlGenericErrorContext,
10577 "PP: entering EOF\n");
10578#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010579 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010580 ctxt->sax->endDocument(ctxt->userData);
10581 goto done;
10582 }
10583 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010584 case XML_PARSER_DTD: {
10585 /*
10586 * Sorry but progressive parsing of the internal subset
10587 * is not expected to be supported. We first check that
10588 * the full content of the internal subset is available and
10589 * the parsing is launched only at that point.
10590 * Internal subset ends up with "']' S? '>'" in an unescaped
10591 * section and not in a ']]>' sequence which are conditional
10592 * sections (whoever argued to keep that crap in XML deserve
10593 * a place in hell !).
10594 */
10595 int base, i;
10596 xmlChar *buf;
10597 xmlChar quote = 0;
10598
10599 base = ctxt->input->cur - ctxt->input->base;
10600 if (base < 0) return(0);
10601 if (ctxt->checkIndex > base)
10602 base = ctxt->checkIndex;
10603 buf = ctxt->input->buf->buffer->content;
10604 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10605 base++) {
10606 if (quote != 0) {
10607 if (buf[base] == quote)
10608 quote = 0;
10609 continue;
10610 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010611 if ((quote == 0) && (buf[base] == '<')) {
10612 int found = 0;
10613 /* special handling of comments */
10614 if (((unsigned int) base + 4 <
10615 ctxt->input->buf->buffer->use) &&
10616 (buf[base + 1] == '!') &&
10617 (buf[base + 2] == '-') &&
10618 (buf[base + 3] == '-')) {
10619 for (;(unsigned int) base + 3 <
10620 ctxt->input->buf->buffer->use; base++) {
10621 if ((buf[base] == '-') &&
10622 (buf[base + 1] == '-') &&
10623 (buf[base + 2] == '>')) {
10624 found = 1;
10625 base += 2;
10626 break;
10627 }
10628 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010629 if (!found) {
10630#if 0
10631 fprintf(stderr, "unfinished comment\n");
10632#endif
10633 break; /* for */
10634 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010635 continue;
10636 }
10637 }
Owen Taylor3473f882001-02-23 17:55:21 +000010638 if (buf[base] == '"') {
10639 quote = '"';
10640 continue;
10641 }
10642 if (buf[base] == '\'') {
10643 quote = '\'';
10644 continue;
10645 }
10646 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010647#if 0
10648 fprintf(stderr, "%c%c%c%c: ", buf[base],
10649 buf[base + 1], buf[base + 2], buf[base + 3]);
10650#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010651 if ((unsigned int) base +1 >=
10652 ctxt->input->buf->buffer->use)
10653 break;
10654 if (buf[base + 1] == ']') {
10655 /* conditional crap, skip both ']' ! */
10656 base++;
10657 continue;
10658 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010659 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010660 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10661 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010662 if (buf[base + i] == '>') {
10663#if 0
10664 fprintf(stderr, "found\n");
10665#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010666 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010667 }
10668 if (!IS_BLANK_CH(buf[base + i])) {
10669#if 0
10670 fprintf(stderr, "not found\n");
10671#endif
10672 goto not_end_of_int_subset;
10673 }
Owen Taylor3473f882001-02-23 17:55:21 +000010674 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010675#if 0
10676 fprintf(stderr, "end of stream\n");
10677#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010678 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010679
Owen Taylor3473f882001-02-23 17:55:21 +000010680 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010681not_end_of_int_subset:
10682 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010683 }
10684 /*
10685 * We didn't found the end of the Internal subset
10686 */
Owen Taylor3473f882001-02-23 17:55:21 +000010687#ifdef DEBUG_PUSH
10688 if (next == 0)
10689 xmlGenericError(xmlGenericErrorContext,
10690 "PP: lookup of int subset end filed\n");
10691#endif
10692 goto done;
10693
10694found_end_int_subset:
10695 xmlParseInternalSubset(ctxt);
10696 ctxt->inSubset = 2;
10697 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10698 (ctxt->sax->externalSubset != NULL))
10699 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10700 ctxt->extSubSystem, ctxt->extSubURI);
10701 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010702 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010703 ctxt->instate = XML_PARSER_PROLOG;
10704 ctxt->checkIndex = 0;
10705#ifdef DEBUG_PUSH
10706 xmlGenericError(xmlGenericErrorContext,
10707 "PP: entering PROLOG\n");
10708#endif
10709 break;
10710 }
10711 case XML_PARSER_COMMENT:
10712 xmlGenericError(xmlGenericErrorContext,
10713 "PP: internal error, state == COMMENT\n");
10714 ctxt->instate = XML_PARSER_CONTENT;
10715#ifdef DEBUG_PUSH
10716 xmlGenericError(xmlGenericErrorContext,
10717 "PP: entering CONTENT\n");
10718#endif
10719 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010720 case XML_PARSER_IGNORE:
10721 xmlGenericError(xmlGenericErrorContext,
10722 "PP: internal error, state == IGNORE");
10723 ctxt->instate = XML_PARSER_DTD;
10724#ifdef DEBUG_PUSH
10725 xmlGenericError(xmlGenericErrorContext,
10726 "PP: entering DTD\n");
10727#endif
10728 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010729 case XML_PARSER_PI:
10730 xmlGenericError(xmlGenericErrorContext,
10731 "PP: internal error, state == PI\n");
10732 ctxt->instate = XML_PARSER_CONTENT;
10733#ifdef DEBUG_PUSH
10734 xmlGenericError(xmlGenericErrorContext,
10735 "PP: entering CONTENT\n");
10736#endif
10737 break;
10738 case XML_PARSER_ENTITY_DECL:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: internal error, state == ENTITY_DECL\n");
10741 ctxt->instate = XML_PARSER_DTD;
10742#ifdef DEBUG_PUSH
10743 xmlGenericError(xmlGenericErrorContext,
10744 "PP: entering DTD\n");
10745#endif
10746 break;
10747 case XML_PARSER_ENTITY_VALUE:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: internal error, state == ENTITY_VALUE\n");
10750 ctxt->instate = XML_PARSER_CONTENT;
10751#ifdef DEBUG_PUSH
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: entering DTD\n");
10754#endif
10755 break;
10756 case XML_PARSER_ATTRIBUTE_VALUE:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10759 ctxt->instate = XML_PARSER_START_TAG;
10760#ifdef DEBUG_PUSH
10761 xmlGenericError(xmlGenericErrorContext,
10762 "PP: entering START_TAG\n");
10763#endif
10764 break;
10765 case XML_PARSER_SYSTEM_LITERAL:
10766 xmlGenericError(xmlGenericErrorContext,
10767 "PP: internal error, state == SYSTEM_LITERAL\n");
10768 ctxt->instate = XML_PARSER_START_TAG;
10769#ifdef DEBUG_PUSH
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: entering START_TAG\n");
10772#endif
10773 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010774 case XML_PARSER_PUBLIC_LITERAL:
10775 xmlGenericError(xmlGenericErrorContext,
10776 "PP: internal error, state == PUBLIC_LITERAL\n");
10777 ctxt->instate = XML_PARSER_START_TAG;
10778#ifdef DEBUG_PUSH
10779 xmlGenericError(xmlGenericErrorContext,
10780 "PP: entering START_TAG\n");
10781#endif
10782 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010783 }
10784 }
10785done:
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10788#endif
10789 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010790encoding_error:
10791 {
10792 char buffer[150];
10793
10794 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10795 ctxt->input->cur[0], ctxt->input->cur[1],
10796 ctxt->input->cur[2], ctxt->input->cur[3]);
10797 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10798 "Input is not proper UTF-8, indicate encoding !\n%s",
10799 BAD_CAST buffer, NULL);
10800 }
10801 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010802}
10803
10804/**
Owen Taylor3473f882001-02-23 17:55:21 +000010805 * xmlParseChunk:
10806 * @ctxt: an XML parser context
10807 * @chunk: an char array
10808 * @size: the size in byte of the chunk
10809 * @terminate: last chunk indicator
10810 *
10811 * Parse a Chunk of memory
10812 *
10813 * Returns zero if no error, the xmlParserErrors otherwise.
10814 */
10815int
10816xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10817 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010818 int end_in_lf = 0;
10819
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010820 if (ctxt == NULL)
10821 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010822 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010823 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010824 if (ctxt->instate == XML_PARSER_START)
10825 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010826 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10827 (chunk[size - 1] == '\r')) {
10828 end_in_lf = 1;
10829 size--;
10830 }
Owen Taylor3473f882001-02-23 17:55:21 +000010831 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10832 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10833 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10834 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010835 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010836
William M. Bracka3215c72004-07-31 16:24:01 +000010837 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10838 if (res < 0) {
10839 ctxt->errNo = XML_PARSER_EOF;
10840 ctxt->disableSAX = 1;
10841 return (XML_PARSER_EOF);
10842 }
Owen Taylor3473f882001-02-23 17:55:21 +000010843 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10844 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010845 ctxt->input->end =
10846 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010847#ifdef DEBUG_PUSH
10848 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10849#endif
10850
Owen Taylor3473f882001-02-23 17:55:21 +000010851 } else if (ctxt->instate != XML_PARSER_EOF) {
10852 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10853 xmlParserInputBufferPtr in = ctxt->input->buf;
10854 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10855 (in->raw != NULL)) {
10856 int nbchars;
10857
10858 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10859 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010860 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010861 xmlGenericError(xmlGenericErrorContext,
10862 "xmlParseChunk: encoder error\n");
10863 return(XML_ERR_INVALID_ENCODING);
10864 }
10865 }
10866 }
10867 }
10868 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010869 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10870 (ctxt->input->buf != NULL)) {
10871 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10872 }
Daniel Veillard14412512005-01-21 23:53:26 +000010873 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010874 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010875 if (terminate) {
10876 /*
10877 * Check for termination
10878 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010879 int avail = 0;
10880
10881 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010882 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010883 avail = ctxt->input->length -
10884 (ctxt->input->cur - ctxt->input->base);
10885 else
10886 avail = ctxt->input->buf->buffer->use -
10887 (ctxt->input->cur - ctxt->input->base);
10888 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010889
Owen Taylor3473f882001-02-23 17:55:21 +000010890 if ((ctxt->instate != XML_PARSER_EOF) &&
10891 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010892 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010894 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010895 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010896 }
Owen Taylor3473f882001-02-23 17:55:21 +000010897 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010899 ctxt->sax->endDocument(ctxt->userData);
10900 }
10901 ctxt->instate = XML_PARSER_EOF;
10902 }
10903 return((xmlParserErrors) ctxt->errNo);
10904}
10905
10906/************************************************************************
10907 * *
10908 * I/O front end functions to the parser *
10909 * *
10910 ************************************************************************/
10911
10912/**
Owen Taylor3473f882001-02-23 17:55:21 +000010913 * xmlCreatePushParserCtxt:
10914 * @sax: a SAX handler
10915 * @user_data: The user data returned on SAX callbacks
10916 * @chunk: a pointer to an array of chars
10917 * @size: number of chars in the array
10918 * @filename: an optional file name or URI
10919 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010920 * Create a parser context for using the XML parser in push mode.
10921 * If @buffer and @size are non-NULL, the data is used to detect
10922 * the encoding. The remaining characters will be parsed so they
10923 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010924 * To allow content encoding detection, @size should be >= 4
10925 * The value of @filename is used for fetching external entities
10926 * and error/warning reports.
10927 *
10928 * Returns the new parser context or NULL
10929 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010930
Owen Taylor3473f882001-02-23 17:55:21 +000010931xmlParserCtxtPtr
10932xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10933 const char *chunk, int size, const char *filename) {
10934 xmlParserCtxtPtr ctxt;
10935 xmlParserInputPtr inputStream;
10936 xmlParserInputBufferPtr buf;
10937 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10938
10939 /*
10940 * plug some encoding conversion routines
10941 */
10942 if ((chunk != NULL) && (size >= 4))
10943 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10944
10945 buf = xmlAllocParserInputBuffer(enc);
10946 if (buf == NULL) return(NULL);
10947
10948 ctxt = xmlNewParserCtxt();
10949 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010950 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010951 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010952 return(NULL);
10953 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010954 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010955 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10956 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010957 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010958 xmlFreeParserInputBuffer(buf);
10959 xmlFreeParserCtxt(ctxt);
10960 return(NULL);
10961 }
Owen Taylor3473f882001-02-23 17:55:21 +000010962 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010964 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010965#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010966 xmlFree(ctxt->sax);
10967 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10968 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010969 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010970 xmlFreeParserInputBuffer(buf);
10971 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010972 return(NULL);
10973 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010974 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10975 if (sax->initialized == XML_SAX2_MAGIC)
10976 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10977 else
10978 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010979 if (user_data != NULL)
10980 ctxt->userData = user_data;
10981 }
10982 if (filename == NULL) {
10983 ctxt->directory = NULL;
10984 } else {
10985 ctxt->directory = xmlParserGetDirectory(filename);
10986 }
10987
10988 inputStream = xmlNewInputStream(ctxt);
10989 if (inputStream == NULL) {
10990 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010991 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010992 return(NULL);
10993 }
10994
10995 if (filename == NULL)
10996 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010997 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010998 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010999 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011000 if (inputStream->filename == NULL) {
11001 xmlFreeParserCtxt(ctxt);
11002 xmlFreeParserInputBuffer(buf);
11003 return(NULL);
11004 }
11005 }
Owen Taylor3473f882001-02-23 17:55:21 +000011006 inputStream->buf = buf;
11007 inputStream->base = inputStream->buf->buffer->content;
11008 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011009 inputStream->end =
11010 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011011
11012 inputPush(ctxt, inputStream);
11013
William M. Brack3a1cd212005-02-11 14:35:54 +000011014 /*
11015 * If the caller didn't provide an initial 'chunk' for determining
11016 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11017 * that it can be automatically determined later
11018 */
11019 if ((size == 0) || (chunk == NULL)) {
11020 ctxt->charset = XML_CHAR_ENCODING_NONE;
11021 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011022 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11023 int cur = ctxt->input->cur - ctxt->input->base;
11024
Owen Taylor3473f882001-02-23 17:55:21 +000011025 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011026
11027 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11028 ctxt->input->cur = ctxt->input->base + cur;
11029 ctxt->input->end =
11030 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011031#ifdef DEBUG_PUSH
11032 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11033#endif
11034 }
11035
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011036 if (enc != XML_CHAR_ENCODING_NONE) {
11037 xmlSwitchEncoding(ctxt, enc);
11038 }
11039
Owen Taylor3473f882001-02-23 17:55:21 +000011040 return(ctxt);
11041}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011042#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011043
11044/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011045 * xmlStopParser:
11046 * @ctxt: an XML parser context
11047 *
11048 * Blocks further parser processing
11049 */
11050void
11051xmlStopParser(xmlParserCtxtPtr ctxt) {
11052 if (ctxt == NULL)
11053 return;
11054 ctxt->instate = XML_PARSER_EOF;
11055 ctxt->disableSAX = 1;
11056 if (ctxt->input != NULL) {
11057 ctxt->input->cur = BAD_CAST"";
11058 ctxt->input->base = ctxt->input->cur;
11059 }
11060}
11061
11062/**
Owen Taylor3473f882001-02-23 17:55:21 +000011063 * xmlCreateIOParserCtxt:
11064 * @sax: a SAX handler
11065 * @user_data: The user data returned on SAX callbacks
11066 * @ioread: an I/O read function
11067 * @ioclose: an I/O close function
11068 * @ioctx: an I/O handler
11069 * @enc: the charset encoding if known
11070 *
11071 * Create a parser context for using the XML parser with an existing
11072 * I/O stream
11073 *
11074 * Returns the new parser context or NULL
11075 */
11076xmlParserCtxtPtr
11077xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11078 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11079 void *ioctx, xmlCharEncoding enc) {
11080 xmlParserCtxtPtr ctxt;
11081 xmlParserInputPtr inputStream;
11082 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011083
11084 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011085
11086 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11087 if (buf == NULL) return(NULL);
11088
11089 ctxt = xmlNewParserCtxt();
11090 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011091 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011092 return(NULL);
11093 }
11094 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011095#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011096 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011097#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011098 xmlFree(ctxt->sax);
11099 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11100 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011101 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011102 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011103 return(NULL);
11104 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011105 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11106 if (sax->initialized == XML_SAX2_MAGIC)
11107 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11108 else
11109 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011110 if (user_data != NULL)
11111 ctxt->userData = user_data;
11112 }
11113
11114 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11115 if (inputStream == NULL) {
11116 xmlFreeParserCtxt(ctxt);
11117 return(NULL);
11118 }
11119 inputPush(ctxt, inputStream);
11120
11121 return(ctxt);
11122}
11123
Daniel Veillard4432df22003-09-28 18:58:27 +000011124#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011125/************************************************************************
11126 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011127 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011128 * *
11129 ************************************************************************/
11130
11131/**
11132 * xmlIOParseDTD:
11133 * @sax: the SAX handler block or NULL
11134 * @input: an Input Buffer
11135 * @enc: the charset encoding if known
11136 *
11137 * Load and parse a DTD
11138 *
11139 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011140 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011141 */
11142
11143xmlDtdPtr
11144xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11145 xmlCharEncoding enc) {
11146 xmlDtdPtr ret = NULL;
11147 xmlParserCtxtPtr ctxt;
11148 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011149 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011150
11151 if (input == NULL)
11152 return(NULL);
11153
11154 ctxt = xmlNewParserCtxt();
11155 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011156 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011157 return(NULL);
11158 }
11159
11160 /*
11161 * Set-up the SAX context
11162 */
11163 if (sax != NULL) {
11164 if (ctxt->sax != NULL)
11165 xmlFree(ctxt->sax);
11166 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011167 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011169 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011170
11171 /*
11172 * generate a parser input from the I/O handler
11173 */
11174
Daniel Veillard43caefb2003-12-07 19:32:22 +000011175 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011176 if (pinput == NULL) {
11177 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011178 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011179 xmlFreeParserCtxt(ctxt);
11180 return(NULL);
11181 }
11182
11183 /*
11184 * plug some encoding conversion routines here.
11185 */
11186 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011187 if (enc != XML_CHAR_ENCODING_NONE) {
11188 xmlSwitchEncoding(ctxt, enc);
11189 }
Owen Taylor3473f882001-02-23 17:55:21 +000011190
11191 pinput->filename = NULL;
11192 pinput->line = 1;
11193 pinput->col = 1;
11194 pinput->base = ctxt->input->cur;
11195 pinput->cur = ctxt->input->cur;
11196 pinput->free = NULL;
11197
11198 /*
11199 * let's parse that entity knowing it's an external subset.
11200 */
11201 ctxt->inSubset = 2;
11202 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11203 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11204 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011205
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011206 if ((enc == XML_CHAR_ENCODING_NONE) &&
11207 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011208 /*
11209 * Get the 4 first bytes and decode the charset
11210 * if enc != XML_CHAR_ENCODING_NONE
11211 * plug some encoding conversion routines.
11212 */
11213 start[0] = RAW;
11214 start[1] = NXT(1);
11215 start[2] = NXT(2);
11216 start[3] = NXT(3);
11217 enc = xmlDetectCharEncoding(start, 4);
11218 if (enc != XML_CHAR_ENCODING_NONE) {
11219 xmlSwitchEncoding(ctxt, enc);
11220 }
11221 }
11222
Owen Taylor3473f882001-02-23 17:55:21 +000011223 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11224
11225 if (ctxt->myDoc != NULL) {
11226 if (ctxt->wellFormed) {
11227 ret = ctxt->myDoc->extSubset;
11228 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011229 if (ret != NULL) {
11230 xmlNodePtr tmp;
11231
11232 ret->doc = NULL;
11233 tmp = ret->children;
11234 while (tmp != NULL) {
11235 tmp->doc = NULL;
11236 tmp = tmp->next;
11237 }
11238 }
Owen Taylor3473f882001-02-23 17:55:21 +000011239 } else {
11240 ret = NULL;
11241 }
11242 xmlFreeDoc(ctxt->myDoc);
11243 ctxt->myDoc = NULL;
11244 }
11245 if (sax != NULL) ctxt->sax = NULL;
11246 xmlFreeParserCtxt(ctxt);
11247
11248 return(ret);
11249}
11250
11251/**
11252 * xmlSAXParseDTD:
11253 * @sax: the SAX handler block
11254 * @ExternalID: a NAME* containing the External ID of the DTD
11255 * @SystemID: a NAME* containing the URL to the DTD
11256 *
11257 * Load and parse an external subset.
11258 *
11259 * Returns the resulting xmlDtdPtr or NULL in case of error.
11260 */
11261
11262xmlDtdPtr
11263xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11264 const xmlChar *SystemID) {
11265 xmlDtdPtr ret = NULL;
11266 xmlParserCtxtPtr ctxt;
11267 xmlParserInputPtr input = NULL;
11268 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011269 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011270
11271 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11272
11273 ctxt = xmlNewParserCtxt();
11274 if (ctxt == NULL) {
11275 return(NULL);
11276 }
11277
11278 /*
11279 * Set-up the SAX context
11280 */
11281 if (sax != NULL) {
11282 if (ctxt->sax != NULL)
11283 xmlFree(ctxt->sax);
11284 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011285 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011286 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011287
11288 /*
11289 * Canonicalise the system ID
11290 */
11291 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011292 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011293 xmlFreeParserCtxt(ctxt);
11294 return(NULL);
11295 }
Owen Taylor3473f882001-02-23 17:55:21 +000011296
11297 /*
11298 * Ask the Entity resolver to load the damn thing
11299 */
11300
11301 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011302 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11303 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 if (input == NULL) {
11305 if (sax != NULL) ctxt->sax = NULL;
11306 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011307 if (systemIdCanonic != NULL)
11308 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011309 return(NULL);
11310 }
11311
11312 /*
11313 * plug some encoding conversion routines here.
11314 */
11315 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011316 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11317 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11318 xmlSwitchEncoding(ctxt, enc);
11319 }
Owen Taylor3473f882001-02-23 17:55:21 +000011320
11321 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011322 input->filename = (char *) systemIdCanonic;
11323 else
11324 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011325 input->line = 1;
11326 input->col = 1;
11327 input->base = ctxt->input->cur;
11328 input->cur = ctxt->input->cur;
11329 input->free = NULL;
11330
11331 /*
11332 * let's parse that entity knowing it's an external subset.
11333 */
11334 ctxt->inSubset = 2;
11335 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11336 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11337 ExternalID, SystemID);
11338 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11339
11340 if (ctxt->myDoc != NULL) {
11341 if (ctxt->wellFormed) {
11342 ret = ctxt->myDoc->extSubset;
11343 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011344 if (ret != NULL) {
11345 xmlNodePtr tmp;
11346
11347 ret->doc = NULL;
11348 tmp = ret->children;
11349 while (tmp != NULL) {
11350 tmp->doc = NULL;
11351 tmp = tmp->next;
11352 }
11353 }
Owen Taylor3473f882001-02-23 17:55:21 +000011354 } else {
11355 ret = NULL;
11356 }
11357 xmlFreeDoc(ctxt->myDoc);
11358 ctxt->myDoc = NULL;
11359 }
11360 if (sax != NULL) ctxt->sax = NULL;
11361 xmlFreeParserCtxt(ctxt);
11362
11363 return(ret);
11364}
11365
Daniel Veillard4432df22003-09-28 18:58:27 +000011366
Owen Taylor3473f882001-02-23 17:55:21 +000011367/**
11368 * xmlParseDTD:
11369 * @ExternalID: a NAME* containing the External ID of the DTD
11370 * @SystemID: a NAME* containing the URL to the DTD
11371 *
11372 * Load and parse an external subset.
11373 *
11374 * Returns the resulting xmlDtdPtr or NULL in case of error.
11375 */
11376
11377xmlDtdPtr
11378xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11379 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11380}
Daniel Veillard4432df22003-09-28 18:58:27 +000011381#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011382
11383/************************************************************************
11384 * *
11385 * Front ends when parsing an Entity *
11386 * *
11387 ************************************************************************/
11388
11389/**
Owen Taylor3473f882001-02-23 17:55:21 +000011390 * xmlParseCtxtExternalEntity:
11391 * @ctx: the existing parsing context
11392 * @URL: the URL for the entity to load
11393 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011394 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011395 *
11396 * Parse an external general entity within an existing parsing context
11397 * An external general parsed entity is well-formed if it matches the
11398 * production labeled extParsedEnt.
11399 *
11400 * [78] extParsedEnt ::= TextDecl? content
11401 *
11402 * Returns 0 if the entity is well formed, -1 in case of args problem and
11403 * the parser error code otherwise
11404 */
11405
11406int
11407xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011408 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011409 xmlParserCtxtPtr ctxt;
11410 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011411 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011412 xmlSAXHandlerPtr oldsax = NULL;
11413 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011414 xmlChar start[4];
11415 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011416 xmlParserInputPtr inputStream;
11417 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011418
Daniel Veillardce682bc2004-11-05 17:22:25 +000011419 if (ctx == NULL) return(-1);
11420
Owen Taylor3473f882001-02-23 17:55:21 +000011421 if (ctx->depth > 40) {
11422 return(XML_ERR_ENTITY_LOOP);
11423 }
11424
Daniel Veillardcda96922001-08-21 10:56:31 +000011425 if (lst != NULL)
11426 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011427 if ((URL == NULL) && (ID == NULL))
11428 return(-1);
11429 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11430 return(-1);
11431
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011432 ctxt = xmlNewParserCtxt();
11433 if (ctxt == NULL) {
11434 return(-1);
11435 }
11436
Owen Taylor3473f882001-02-23 17:55:21 +000011437 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011438 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011439
11440 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11441 if (inputStream == NULL) {
11442 xmlFreeParserCtxt(ctxt);
11443 return(-1);
11444 }
11445
11446 inputPush(ctxt, inputStream);
11447
11448 if ((ctxt->directory == NULL) && (directory == NULL))
11449 directory = xmlParserGetDirectory((char *)URL);
11450 if ((ctxt->directory == NULL) && (directory != NULL))
11451 ctxt->directory = directory;
11452
Owen Taylor3473f882001-02-23 17:55:21 +000011453 oldsax = ctxt->sax;
11454 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011455 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011456 newDoc = xmlNewDoc(BAD_CAST "1.0");
11457 if (newDoc == NULL) {
11458 xmlFreeParserCtxt(ctxt);
11459 return(-1);
11460 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011461 if (ctx->myDoc->dict) {
11462 newDoc->dict = ctx->myDoc->dict;
11463 xmlDictReference(newDoc->dict);
11464 }
Owen Taylor3473f882001-02-23 17:55:21 +000011465 if (ctx->myDoc != NULL) {
11466 newDoc->intSubset = ctx->myDoc->intSubset;
11467 newDoc->extSubset = ctx->myDoc->extSubset;
11468 }
11469 if (ctx->myDoc->URL != NULL) {
11470 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11471 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011472 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11473 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011474 ctxt->sax = oldsax;
11475 xmlFreeParserCtxt(ctxt);
11476 newDoc->intSubset = NULL;
11477 newDoc->extSubset = NULL;
11478 xmlFreeDoc(newDoc);
11479 return(-1);
11480 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011481 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011482 nodePush(ctxt, newDoc->children);
11483 if (ctx->myDoc == NULL) {
11484 ctxt->myDoc = newDoc;
11485 } else {
11486 ctxt->myDoc = ctx->myDoc;
11487 newDoc->children->doc = ctx->myDoc;
11488 }
11489
Daniel Veillard87a764e2001-06-20 17:41:10 +000011490 /*
11491 * Get the 4 first bytes and decode the charset
11492 * if enc != XML_CHAR_ENCODING_NONE
11493 * plug some encoding conversion routines.
11494 */
11495 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011496 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11497 start[0] = RAW;
11498 start[1] = NXT(1);
11499 start[2] = NXT(2);
11500 start[3] = NXT(3);
11501 enc = xmlDetectCharEncoding(start, 4);
11502 if (enc != XML_CHAR_ENCODING_NONE) {
11503 xmlSwitchEncoding(ctxt, enc);
11504 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011505 }
11506
Owen Taylor3473f882001-02-23 17:55:21 +000011507 /*
11508 * Parse a possible text declaration first
11509 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011510 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011511 xmlParseTextDecl(ctxt);
11512 }
11513
11514 /*
11515 * Doing validity checking on chunk doesn't make sense
11516 */
11517 ctxt->instate = XML_PARSER_CONTENT;
11518 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011519 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011520 ctxt->loadsubset = ctx->loadsubset;
11521 ctxt->depth = ctx->depth + 1;
11522 ctxt->replaceEntities = ctx->replaceEntities;
11523 if (ctxt->validate) {
11524 ctxt->vctxt.error = ctx->vctxt.error;
11525 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011526 } else {
11527 ctxt->vctxt.error = NULL;
11528 ctxt->vctxt.warning = NULL;
11529 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011530 ctxt->vctxt.nodeTab = NULL;
11531 ctxt->vctxt.nodeNr = 0;
11532 ctxt->vctxt.nodeMax = 0;
11533 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011534 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11535 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011536 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11537 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11538 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011539 ctxt->dictNames = ctx->dictNames;
11540 ctxt->attsDefault = ctx->attsDefault;
11541 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011542 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011543
11544 xmlParseContent(ctxt);
11545
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011546 ctx->validate = ctxt->validate;
11547 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011548 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011549 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011550 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011551 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011552 }
11553 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011554 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011555 }
11556
11557 if (!ctxt->wellFormed) {
11558 if (ctxt->errNo == 0)
11559 ret = 1;
11560 else
11561 ret = ctxt->errNo;
11562 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011563 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011564 xmlNodePtr cur;
11565
11566 /*
11567 * Return the newly created nodeset after unlinking it from
11568 * they pseudo parent.
11569 */
11570 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011571 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011572 while (cur != NULL) {
11573 cur->parent = NULL;
11574 cur = cur->next;
11575 }
11576 newDoc->children->children = NULL;
11577 }
11578 ret = 0;
11579 }
11580 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011581 ctxt->dict = NULL;
11582 ctxt->attsDefault = NULL;
11583 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011584 xmlFreeParserCtxt(ctxt);
11585 newDoc->intSubset = NULL;
11586 newDoc->extSubset = NULL;
11587 xmlFreeDoc(newDoc);
11588
11589 return(ret);
11590}
11591
11592/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011593 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011594 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011595 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011596 * @sax: the SAX handler bloc (possibly NULL)
11597 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11598 * @depth: Used for loop detection, use 0
11599 * @URL: the URL for the entity to load
11600 * @ID: the System ID for the entity to load
11601 * @list: the return value for the set of parsed nodes
11602 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011603 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011604 *
11605 * Returns 0 if the entity is well formed, -1 in case of args problem and
11606 * the parser error code otherwise
11607 */
11608
Daniel Veillard7d515752003-09-26 19:12:37 +000011609static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011610xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11611 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011612 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011613 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011614 xmlParserCtxtPtr ctxt;
11615 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011616 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011617 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011618 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011619 xmlChar start[4];
11620 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011621
11622 if (depth > 40) {
11623 return(XML_ERR_ENTITY_LOOP);
11624 }
11625
11626
11627
11628 if (list != NULL)
11629 *list = NULL;
11630 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011631 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011632 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011633 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011634
11635
11636 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011637 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011638 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011639 if (oldctxt != NULL) {
11640 ctxt->_private = oldctxt->_private;
11641 ctxt->loadsubset = oldctxt->loadsubset;
11642 ctxt->validate = oldctxt->validate;
11643 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011644 ctxt->record_info = oldctxt->record_info;
11645 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11646 ctxt->node_seq.length = oldctxt->node_seq.length;
11647 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011648 } else {
11649 /*
11650 * Doing validity checking on chunk without context
11651 * doesn't make sense
11652 */
11653 ctxt->_private = NULL;
11654 ctxt->validate = 0;
11655 ctxt->external = 2;
11656 ctxt->loadsubset = 0;
11657 }
Owen Taylor3473f882001-02-23 17:55:21 +000011658 if (sax != NULL) {
11659 oldsax = ctxt->sax;
11660 ctxt->sax = sax;
11661 if (user_data != NULL)
11662 ctxt->userData = user_data;
11663 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011664 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011665 newDoc = xmlNewDoc(BAD_CAST "1.0");
11666 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011667 ctxt->node_seq.maximum = 0;
11668 ctxt->node_seq.length = 0;
11669 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011670 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011671 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011672 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011673 newDoc->intSubset = doc->intSubset;
11674 newDoc->extSubset = doc->extSubset;
11675 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011676 xmlDictReference(newDoc->dict);
11677
Owen Taylor3473f882001-02-23 17:55:21 +000011678 if (doc->URL != NULL) {
11679 newDoc->URL = xmlStrdup(doc->URL);
11680 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011681 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11682 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011683 if (sax != NULL)
11684 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011685 ctxt->node_seq.maximum = 0;
11686 ctxt->node_seq.length = 0;
11687 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011688 xmlFreeParserCtxt(ctxt);
11689 newDoc->intSubset = NULL;
11690 newDoc->extSubset = NULL;
11691 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011692 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011693 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011694 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011695 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011696 ctxt->myDoc = doc;
11697 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011698
Daniel Veillard87a764e2001-06-20 17:41:10 +000011699 /*
11700 * Get the 4 first bytes and decode the charset
11701 * if enc != XML_CHAR_ENCODING_NONE
11702 * plug some encoding conversion routines.
11703 */
11704 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011705 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11706 start[0] = RAW;
11707 start[1] = NXT(1);
11708 start[2] = NXT(2);
11709 start[3] = NXT(3);
11710 enc = xmlDetectCharEncoding(start, 4);
11711 if (enc != XML_CHAR_ENCODING_NONE) {
11712 xmlSwitchEncoding(ctxt, enc);
11713 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011714 }
11715
Owen Taylor3473f882001-02-23 17:55:21 +000011716 /*
11717 * Parse a possible text declaration first
11718 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011719 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011720 xmlParseTextDecl(ctxt);
11721 }
11722
Owen Taylor3473f882001-02-23 17:55:21 +000011723 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011724 ctxt->depth = depth;
11725
11726 xmlParseContent(ctxt);
11727
Daniel Veillard561b7f82002-03-20 21:55:57 +000011728 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011729 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011730 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011731 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011732 }
11733 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011734 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011735 }
11736
11737 if (!ctxt->wellFormed) {
11738 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011739 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011740 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011741 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011742 } else {
11743 if (list != NULL) {
11744 xmlNodePtr cur;
11745
11746 /*
11747 * Return the newly created nodeset after unlinking it from
11748 * they pseudo parent.
11749 */
11750 cur = newDoc->children->children;
11751 *list = cur;
11752 while (cur != NULL) {
11753 cur->parent = NULL;
11754 cur = cur->next;
11755 }
11756 newDoc->children->children = NULL;
11757 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011758 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011759 }
11760 if (sax != NULL)
11761 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011762 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11763 oldctxt->node_seq.length = ctxt->node_seq.length;
11764 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011765 ctxt->node_seq.maximum = 0;
11766 ctxt->node_seq.length = 0;
11767 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011768 xmlFreeParserCtxt(ctxt);
11769 newDoc->intSubset = NULL;
11770 newDoc->extSubset = NULL;
11771 xmlFreeDoc(newDoc);
11772
11773 return(ret);
11774}
11775
Daniel Veillard81273902003-09-30 00:43:48 +000011776#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011777/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011778 * xmlParseExternalEntity:
11779 * @doc: the document the chunk pertains to
11780 * @sax: the SAX handler bloc (possibly NULL)
11781 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11782 * @depth: Used for loop detection, use 0
11783 * @URL: the URL for the entity to load
11784 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011785 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011786 *
11787 * Parse an external general entity
11788 * An external general parsed entity is well-formed if it matches the
11789 * production labeled extParsedEnt.
11790 *
11791 * [78] extParsedEnt ::= TextDecl? content
11792 *
11793 * Returns 0 if the entity is well formed, -1 in case of args problem and
11794 * the parser error code otherwise
11795 */
11796
11797int
11798xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011799 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011800 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011801 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011802}
11803
11804/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011805 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011806 * @doc: the document the chunk pertains to
11807 * @sax: the SAX handler bloc (possibly NULL)
11808 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11809 * @depth: Used for loop detection, use 0
11810 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011811 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011812 *
11813 * Parse a well-balanced chunk of an XML document
11814 * called by the parser
11815 * The allowed sequence for the Well Balanced Chunk is the one defined by
11816 * the content production in the XML grammar:
11817 *
11818 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11819 *
11820 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11821 * the parser error code otherwise
11822 */
11823
11824int
11825xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011826 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011827 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11828 depth, string, lst, 0 );
11829}
Daniel Veillard81273902003-09-30 00:43:48 +000011830#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011831
11832/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011833 * xmlParseBalancedChunkMemoryInternal:
11834 * @oldctxt: the existing parsing context
11835 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11836 * @user_data: the user data field for the parser context
11837 * @lst: the return value for the set of parsed nodes
11838 *
11839 *
11840 * Parse a well-balanced chunk of an XML document
11841 * called by the parser
11842 * The allowed sequence for the Well Balanced Chunk is the one defined by
11843 * the content production in the XML grammar:
11844 *
11845 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11846 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011847 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11848 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011849 *
11850 * In case recover is set to 1, the nodelist will not be empty even if
11851 * the parsed chunk is not well balanced.
11852 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011853static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011854xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11855 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11856 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011857 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011858 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011859 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011860 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011861 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011862 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011863 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011864
11865 if (oldctxt->depth > 40) {
11866 return(XML_ERR_ENTITY_LOOP);
11867 }
11868
11869
11870 if (lst != NULL)
11871 *lst = NULL;
11872 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011873 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011874
11875 size = xmlStrlen(string);
11876
11877 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011878 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011879 if (user_data != NULL)
11880 ctxt->userData = user_data;
11881 else
11882 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011883 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11884 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011885 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11886 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11887 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011888
11889 oldsax = ctxt->sax;
11890 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011891 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011892 ctxt->replaceEntities = oldctxt->replaceEntities;
11893 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011894
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011895 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011896 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011897 newDoc = xmlNewDoc(BAD_CAST "1.0");
11898 if (newDoc == NULL) {
11899 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011900 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011901 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011902 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011903 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011904 newDoc->dict = ctxt->dict;
11905 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011906 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011907 } else {
11908 ctxt->myDoc = oldctxt->myDoc;
11909 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011910 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011911 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011912 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11913 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011914 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011915 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011916 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011917 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011918 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011919 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011920 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011921 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011922 ctxt->myDoc->children = NULL;
11923 ctxt->myDoc->last = NULL;
11924 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011925 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011926 ctxt->instate = XML_PARSER_CONTENT;
11927 ctxt->depth = oldctxt->depth + 1;
11928
Daniel Veillard328f48c2002-11-15 15:24:34 +000011929 ctxt->validate = 0;
11930 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011931 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11932 /*
11933 * ID/IDREF registration will be done in xmlValidateElement below
11934 */
11935 ctxt->loadsubset |= XML_SKIP_IDS;
11936 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011937 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011938 ctxt->attsDefault = oldctxt->attsDefault;
11939 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011940
Daniel Veillard68e9e742002-11-16 15:35:11 +000011941 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011942 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011943 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011944 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011945 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011946 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011947 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011948 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011949 }
11950
11951 if (!ctxt->wellFormed) {
11952 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011953 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011954 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011955 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011956 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011957 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011958 }
11959
William M. Brack7b9154b2003-09-27 19:23:50 +000011960 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011961 xmlNodePtr cur;
11962
11963 /*
11964 * Return the newly created nodeset after unlinking it from
11965 * they pseudo parent.
11966 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011967 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011968 *lst = cur;
11969 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011970#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011971 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11972 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11973 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011974 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11975 oldctxt->myDoc, cur);
11976 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011977#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011978 cur->parent = NULL;
11979 cur = cur->next;
11980 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011981 ctxt->myDoc->children->children = NULL;
11982 }
11983 if (ctxt->myDoc != NULL) {
11984 xmlFreeNode(ctxt->myDoc->children);
11985 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011986 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011987 }
11988
11989 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011990 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011991 ctxt->attsDefault = NULL;
11992 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011993 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011994 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011995 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011996 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011997
11998 return(ret);
11999}
12000
Daniel Veillard29b17482004-08-16 00:39:03 +000012001/**
12002 * xmlParseInNodeContext:
12003 * @node: the context node
12004 * @data: the input string
12005 * @datalen: the input string length in bytes
12006 * @options: a combination of xmlParserOption
12007 * @lst: the return value for the set of parsed nodes
12008 *
12009 * Parse a well-balanced chunk of an XML document
12010 * within the context (DTD, namespaces, etc ...) of the given node.
12011 *
12012 * The allowed sequence for the data is a Well Balanced Chunk defined by
12013 * the content production in the XML grammar:
12014 *
12015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12016 *
12017 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12018 * error code otherwise
12019 */
12020xmlParserErrors
12021xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12022 int options, xmlNodePtr *lst) {
12023#ifdef SAX2
12024 xmlParserCtxtPtr ctxt;
12025 xmlDocPtr doc = NULL;
12026 xmlNodePtr fake, cur;
12027 int nsnr = 0;
12028
12029 xmlParserErrors ret = XML_ERR_OK;
12030
12031 /*
12032 * check all input parameters, grab the document
12033 */
12034 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12035 return(XML_ERR_INTERNAL_ERROR);
12036 switch (node->type) {
12037 case XML_ELEMENT_NODE:
12038 case XML_ATTRIBUTE_NODE:
12039 case XML_TEXT_NODE:
12040 case XML_CDATA_SECTION_NODE:
12041 case XML_ENTITY_REF_NODE:
12042 case XML_PI_NODE:
12043 case XML_COMMENT_NODE:
12044 case XML_DOCUMENT_NODE:
12045 case XML_HTML_DOCUMENT_NODE:
12046 break;
12047 default:
12048 return(XML_ERR_INTERNAL_ERROR);
12049
12050 }
12051 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12052 (node->type != XML_DOCUMENT_NODE) &&
12053 (node->type != XML_HTML_DOCUMENT_NODE))
12054 node = node->parent;
12055 if (node == NULL)
12056 return(XML_ERR_INTERNAL_ERROR);
12057 if (node->type == XML_ELEMENT_NODE)
12058 doc = node->doc;
12059 else
12060 doc = (xmlDocPtr) node;
12061 if (doc == NULL)
12062 return(XML_ERR_INTERNAL_ERROR);
12063
12064 /*
12065 * allocate a context and set-up everything not related to the
12066 * node position in the tree
12067 */
12068 if (doc->type == XML_DOCUMENT_NODE)
12069 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12070#ifdef LIBXML_HTML_ENABLED
12071 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12072 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12073#endif
12074 else
12075 return(XML_ERR_INTERNAL_ERROR);
12076
12077 if (ctxt == NULL)
12078 return(XML_ERR_NO_MEMORY);
12079 fake = xmlNewComment(NULL);
12080 if (fake == NULL) {
12081 xmlFreeParserCtxt(ctxt);
12082 return(XML_ERR_NO_MEMORY);
12083 }
12084 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012085
12086 /*
12087 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12088 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12089 * we must wait until the last moment to free the original one.
12090 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012091 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012092 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012093 xmlDictFree(ctxt->dict);
12094 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012095 } else
12096 options |= XML_PARSE_NODICT;
12097
12098 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012099 xmlDetectSAX2(ctxt);
12100 ctxt->myDoc = doc;
12101
12102 if (node->type == XML_ELEMENT_NODE) {
12103 nodePush(ctxt, node);
12104 /*
12105 * initialize the SAX2 namespaces stack
12106 */
12107 cur = node;
12108 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12109 xmlNsPtr ns = cur->nsDef;
12110 const xmlChar *iprefix, *ihref;
12111
12112 while (ns != NULL) {
12113 if (ctxt->dict) {
12114 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12115 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12116 } else {
12117 iprefix = ns->prefix;
12118 ihref = ns->href;
12119 }
12120
12121 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12122 nsPush(ctxt, iprefix, ihref);
12123 nsnr++;
12124 }
12125 ns = ns->next;
12126 }
12127 cur = cur->parent;
12128 }
12129 ctxt->instate = XML_PARSER_CONTENT;
12130 }
12131
12132 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12133 /*
12134 * ID/IDREF registration will be done in xmlValidateElement below
12135 */
12136 ctxt->loadsubset |= XML_SKIP_IDS;
12137 }
12138
Daniel Veillard499cc922006-01-18 17:22:35 +000012139#ifdef LIBXML_HTML_ENABLED
12140 if (doc->type == XML_HTML_DOCUMENT_NODE)
12141 __htmlParseContent(ctxt);
12142 else
12143#endif
12144 xmlParseContent(ctxt);
12145
Daniel Veillard29b17482004-08-16 00:39:03 +000012146 nsPop(ctxt, nsnr);
12147 if ((RAW == '<') && (NXT(1) == '/')) {
12148 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12149 } else if (RAW != 0) {
12150 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12151 }
12152 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12153 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12154 ctxt->wellFormed = 0;
12155 }
12156
12157 if (!ctxt->wellFormed) {
12158 if (ctxt->errNo == 0)
12159 ret = XML_ERR_INTERNAL_ERROR;
12160 else
12161 ret = (xmlParserErrors)ctxt->errNo;
12162 } else {
12163 ret = XML_ERR_OK;
12164 }
12165
12166 /*
12167 * Return the newly created nodeset after unlinking it from
12168 * the pseudo sibling.
12169 */
12170
12171 cur = fake->next;
12172 fake->next = NULL;
12173 node->last = fake;
12174
12175 if (cur != NULL) {
12176 cur->prev = NULL;
12177 }
12178
12179 *lst = cur;
12180
12181 while (cur != NULL) {
12182 cur->parent = NULL;
12183 cur = cur->next;
12184 }
12185
12186 xmlUnlinkNode(fake);
12187 xmlFreeNode(fake);
12188
12189
12190 if (ret != XML_ERR_OK) {
12191 xmlFreeNodeList(*lst);
12192 *lst = NULL;
12193 }
William M. Brackc3f81342004-10-03 01:22:44 +000012194
William M. Brackb7b54de2004-10-06 16:38:01 +000012195 if (doc->dict != NULL)
12196 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012197 xmlFreeParserCtxt(ctxt);
12198
12199 return(ret);
12200#else /* !SAX2 */
12201 return(XML_ERR_INTERNAL_ERROR);
12202#endif
12203}
12204
Daniel Veillard81273902003-09-30 00:43:48 +000012205#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012206/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012207 * xmlParseBalancedChunkMemoryRecover:
12208 * @doc: the document the chunk pertains to
12209 * @sax: the SAX handler bloc (possibly NULL)
12210 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12211 * @depth: Used for loop detection, use 0
12212 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12213 * @lst: the return value for the set of parsed nodes
12214 * @recover: return nodes even if the data is broken (use 0)
12215 *
12216 *
12217 * Parse a well-balanced chunk of an XML document
12218 * called by the parser
12219 * The allowed sequence for the Well Balanced Chunk is the one defined by
12220 * the content production in the XML grammar:
12221 *
12222 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12223 *
12224 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12225 * the parser error code otherwise
12226 *
12227 * In case recover is set to 1, the nodelist will not be empty even if
12228 * the parsed chunk is not well balanced.
12229 */
12230int
12231xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12232 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12233 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012234 xmlParserCtxtPtr ctxt;
12235 xmlDocPtr newDoc;
12236 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012237 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012238 int size;
12239 int ret = 0;
12240
12241 if (depth > 40) {
12242 return(XML_ERR_ENTITY_LOOP);
12243 }
12244
12245
Daniel Veillardcda96922001-08-21 10:56:31 +000012246 if (lst != NULL)
12247 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012248 if (string == NULL)
12249 return(-1);
12250
12251 size = xmlStrlen(string);
12252
12253 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12254 if (ctxt == NULL) return(-1);
12255 ctxt->userData = ctxt;
12256 if (sax != NULL) {
12257 oldsax = ctxt->sax;
12258 ctxt->sax = sax;
12259 if (user_data != NULL)
12260 ctxt->userData = user_data;
12261 }
12262 newDoc = xmlNewDoc(BAD_CAST "1.0");
12263 if (newDoc == NULL) {
12264 xmlFreeParserCtxt(ctxt);
12265 return(-1);
12266 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012267 if ((doc != NULL) && (doc->dict != NULL)) {
12268 xmlDictFree(ctxt->dict);
12269 ctxt->dict = doc->dict;
12270 xmlDictReference(ctxt->dict);
12271 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12272 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12273 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12274 ctxt->dictNames = 1;
12275 } else {
12276 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12277 }
Owen Taylor3473f882001-02-23 17:55:21 +000012278 if (doc != NULL) {
12279 newDoc->intSubset = doc->intSubset;
12280 newDoc->extSubset = doc->extSubset;
12281 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012282 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12283 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012284 if (sax != NULL)
12285 ctxt->sax = oldsax;
12286 xmlFreeParserCtxt(ctxt);
12287 newDoc->intSubset = NULL;
12288 newDoc->extSubset = NULL;
12289 xmlFreeDoc(newDoc);
12290 return(-1);
12291 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012292 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12293 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012294 if (doc == NULL) {
12295 ctxt->myDoc = newDoc;
12296 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012297 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012298 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012299 /* Ensure that doc has XML spec namespace */
12300 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12301 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012302 }
12303 ctxt->instate = XML_PARSER_CONTENT;
12304 ctxt->depth = depth;
12305
12306 /*
12307 * Doing validity checking on chunk doesn't make sense
12308 */
12309 ctxt->validate = 0;
12310 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012311 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012312
Daniel Veillardb39bc392002-10-26 19:29:51 +000012313 if ( doc != NULL ){
12314 content = doc->children;
12315 doc->children = NULL;
12316 xmlParseContent(ctxt);
12317 doc->children = content;
12318 }
12319 else {
12320 xmlParseContent(ctxt);
12321 }
Owen Taylor3473f882001-02-23 17:55:21 +000012322 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012323 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012324 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012325 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012326 }
12327 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012328 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012329 }
12330
12331 if (!ctxt->wellFormed) {
12332 if (ctxt->errNo == 0)
12333 ret = 1;
12334 else
12335 ret = ctxt->errNo;
12336 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012337 ret = 0;
12338 }
12339
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012340 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12341 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012342
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012343 /*
12344 * Return the newly created nodeset after unlinking it from
12345 * they pseudo parent.
12346 */
12347 cur = newDoc->children->children;
12348 *lst = cur;
12349 while (cur != NULL) {
12350 xmlSetTreeDoc(cur, doc);
12351 cur->parent = NULL;
12352 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012353 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012354 newDoc->children->children = NULL;
12355 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012356
Owen Taylor3473f882001-02-23 17:55:21 +000012357 if (sax != NULL)
12358 ctxt->sax = oldsax;
12359 xmlFreeParserCtxt(ctxt);
12360 newDoc->intSubset = NULL;
12361 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012362 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012363 xmlFreeDoc(newDoc);
12364
12365 return(ret);
12366}
12367
12368/**
12369 * xmlSAXParseEntity:
12370 * @sax: the SAX handler block
12371 * @filename: the filename
12372 *
12373 * parse an XML external entity out of context and build a tree.
12374 * It use the given SAX function block to handle the parsing callback.
12375 * If sax is NULL, fallback to the default DOM tree building routines.
12376 *
12377 * [78] extParsedEnt ::= TextDecl? content
12378 *
12379 * This correspond to a "Well Balanced" chunk
12380 *
12381 * Returns the resulting document tree
12382 */
12383
12384xmlDocPtr
12385xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12386 xmlDocPtr ret;
12387 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012388
12389 ctxt = xmlCreateFileParserCtxt(filename);
12390 if (ctxt == NULL) {
12391 return(NULL);
12392 }
12393 if (sax != NULL) {
12394 if (ctxt->sax != NULL)
12395 xmlFree(ctxt->sax);
12396 ctxt->sax = sax;
12397 ctxt->userData = NULL;
12398 }
12399
Owen Taylor3473f882001-02-23 17:55:21 +000012400 xmlParseExtParsedEnt(ctxt);
12401
12402 if (ctxt->wellFormed)
12403 ret = ctxt->myDoc;
12404 else {
12405 ret = NULL;
12406 xmlFreeDoc(ctxt->myDoc);
12407 ctxt->myDoc = NULL;
12408 }
12409 if (sax != NULL)
12410 ctxt->sax = NULL;
12411 xmlFreeParserCtxt(ctxt);
12412
12413 return(ret);
12414}
12415
12416/**
12417 * xmlParseEntity:
12418 * @filename: the filename
12419 *
12420 * parse an XML external entity out of context and build a tree.
12421 *
12422 * [78] extParsedEnt ::= TextDecl? content
12423 *
12424 * This correspond to a "Well Balanced" chunk
12425 *
12426 * Returns the resulting document tree
12427 */
12428
12429xmlDocPtr
12430xmlParseEntity(const char *filename) {
12431 return(xmlSAXParseEntity(NULL, filename));
12432}
Daniel Veillard81273902003-09-30 00:43:48 +000012433#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012434
12435/**
12436 * xmlCreateEntityParserCtxt:
12437 * @URL: the entity URL
12438 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012439 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012440 *
12441 * Create a parser context for an external entity
12442 * Automatic support for ZLIB/Compress compressed document is provided
12443 * by default if found at compile-time.
12444 *
12445 * Returns the new parser context or NULL
12446 */
12447xmlParserCtxtPtr
12448xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12449 const xmlChar *base) {
12450 xmlParserCtxtPtr ctxt;
12451 xmlParserInputPtr inputStream;
12452 char *directory = NULL;
12453 xmlChar *uri;
12454
12455 ctxt = xmlNewParserCtxt();
12456 if (ctxt == NULL) {
12457 return(NULL);
12458 }
12459
12460 uri = xmlBuildURI(URL, base);
12461
12462 if (uri == NULL) {
12463 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12464 if (inputStream == NULL) {
12465 xmlFreeParserCtxt(ctxt);
12466 return(NULL);
12467 }
12468
12469 inputPush(ctxt, inputStream);
12470
12471 if ((ctxt->directory == NULL) && (directory == NULL))
12472 directory = xmlParserGetDirectory((char *)URL);
12473 if ((ctxt->directory == NULL) && (directory != NULL))
12474 ctxt->directory = directory;
12475 } else {
12476 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12477 if (inputStream == NULL) {
12478 xmlFree(uri);
12479 xmlFreeParserCtxt(ctxt);
12480 return(NULL);
12481 }
12482
12483 inputPush(ctxt, inputStream);
12484
12485 if ((ctxt->directory == NULL) && (directory == NULL))
12486 directory = xmlParserGetDirectory((char *)uri);
12487 if ((ctxt->directory == NULL) && (directory != NULL))
12488 ctxt->directory = directory;
12489 xmlFree(uri);
12490 }
Owen Taylor3473f882001-02-23 17:55:21 +000012491 return(ctxt);
12492}
12493
12494/************************************************************************
12495 * *
12496 * Front ends when parsing from a file *
12497 * *
12498 ************************************************************************/
12499
12500/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012501 * xmlCreateURLParserCtxt:
12502 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012503 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012504 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012505 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012506 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012507 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012508 *
12509 * Returns the new parser context or NULL
12510 */
12511xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012512xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012513{
12514 xmlParserCtxtPtr ctxt;
12515 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012516 char *directory = NULL;
12517
Owen Taylor3473f882001-02-23 17:55:21 +000012518 ctxt = xmlNewParserCtxt();
12519 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012520 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012521 return(NULL);
12522 }
12523
Daniel Veillarddf292f72005-01-16 19:00:15 +000012524 if (options)
12525 xmlCtxtUseOptions(ctxt, options);
12526 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012527
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012528 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012529 if (inputStream == NULL) {
12530 xmlFreeParserCtxt(ctxt);
12531 return(NULL);
12532 }
12533
Owen Taylor3473f882001-02-23 17:55:21 +000012534 inputPush(ctxt, inputStream);
12535 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012536 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012537 if ((ctxt->directory == NULL) && (directory != NULL))
12538 ctxt->directory = directory;
12539
12540 return(ctxt);
12541}
12542
Daniel Veillard61b93382003-11-03 14:28:31 +000012543/**
12544 * xmlCreateFileParserCtxt:
12545 * @filename: the filename
12546 *
12547 * Create a parser context for a file content.
12548 * Automatic support for ZLIB/Compress compressed document is provided
12549 * by default if found at compile-time.
12550 *
12551 * Returns the new parser context or NULL
12552 */
12553xmlParserCtxtPtr
12554xmlCreateFileParserCtxt(const char *filename)
12555{
12556 return(xmlCreateURLParserCtxt(filename, 0));
12557}
12558
Daniel Veillard81273902003-09-30 00:43:48 +000012559#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012560/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012561 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012562 * @sax: the SAX handler block
12563 * @filename: the filename
12564 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12565 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012566 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012567 *
12568 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12569 * compressed document is provided by default if found at compile-time.
12570 * It use the given SAX function block to handle the parsing callback.
12571 * If sax is NULL, fallback to the default DOM tree building routines.
12572 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012573 * User data (void *) is stored within the parser context in the
12574 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012575 *
Owen Taylor3473f882001-02-23 17:55:21 +000012576 * Returns the resulting document tree
12577 */
12578
12579xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012580xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12581 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012582 xmlDocPtr ret;
12583 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012584
Daniel Veillard635ef722001-10-29 11:48:19 +000012585 xmlInitParser();
12586
Owen Taylor3473f882001-02-23 17:55:21 +000012587 ctxt = xmlCreateFileParserCtxt(filename);
12588 if (ctxt == NULL) {
12589 return(NULL);
12590 }
12591 if (sax != NULL) {
12592 if (ctxt->sax != NULL)
12593 xmlFree(ctxt->sax);
12594 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012595 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012596 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012597 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012598 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012599 }
Owen Taylor3473f882001-02-23 17:55:21 +000012600
Daniel Veillard37d2d162008-03-14 10:54:00 +000012601 if (ctxt->directory == NULL)
12602 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012603
Daniel Veillarddad3f682002-11-17 16:47:27 +000012604 ctxt->recovery = recovery;
12605
Owen Taylor3473f882001-02-23 17:55:21 +000012606 xmlParseDocument(ctxt);
12607
William M. Brackc07329e2003-09-08 01:57:30 +000012608 if ((ctxt->wellFormed) || recovery) {
12609 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012610 if (ret != NULL) {
12611 if (ctxt->input->buf->compressed > 0)
12612 ret->compression = 9;
12613 else
12614 ret->compression = ctxt->input->buf->compressed;
12615 }
William M. Brackc07329e2003-09-08 01:57:30 +000012616 }
Owen Taylor3473f882001-02-23 17:55:21 +000012617 else {
12618 ret = NULL;
12619 xmlFreeDoc(ctxt->myDoc);
12620 ctxt->myDoc = NULL;
12621 }
12622 if (sax != NULL)
12623 ctxt->sax = NULL;
12624 xmlFreeParserCtxt(ctxt);
12625
12626 return(ret);
12627}
12628
12629/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012630 * xmlSAXParseFile:
12631 * @sax: the SAX handler block
12632 * @filename: the filename
12633 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12634 * documents
12635 *
12636 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12637 * compressed document is provided by default if found at compile-time.
12638 * It use the given SAX function block to handle the parsing callback.
12639 * If sax is NULL, fallback to the default DOM tree building routines.
12640 *
12641 * Returns the resulting document tree
12642 */
12643
12644xmlDocPtr
12645xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12646 int recovery) {
12647 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12648}
12649
12650/**
Owen Taylor3473f882001-02-23 17:55:21 +000012651 * xmlRecoverDoc:
12652 * @cur: a pointer to an array of xmlChar
12653 *
12654 * parse an XML in-memory document and build a tree.
12655 * In the case the document is not Well Formed, a tree is built anyway
12656 *
12657 * Returns the resulting document tree
12658 */
12659
12660xmlDocPtr
12661xmlRecoverDoc(xmlChar *cur) {
12662 return(xmlSAXParseDoc(NULL, cur, 1));
12663}
12664
12665/**
12666 * xmlParseFile:
12667 * @filename: the filename
12668 *
12669 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12670 * compressed document is provided by default if found at compile-time.
12671 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012672 * Returns the resulting document tree if the file was wellformed,
12673 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012674 */
12675
12676xmlDocPtr
12677xmlParseFile(const char *filename) {
12678 return(xmlSAXParseFile(NULL, filename, 0));
12679}
12680
12681/**
12682 * xmlRecoverFile:
12683 * @filename: the filename
12684 *
12685 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12686 * compressed document is provided by default if found at compile-time.
12687 * In the case the document is not Well Formed, a tree is built anyway
12688 *
12689 * Returns the resulting document tree
12690 */
12691
12692xmlDocPtr
12693xmlRecoverFile(const char *filename) {
12694 return(xmlSAXParseFile(NULL, filename, 1));
12695}
12696
12697
12698/**
12699 * xmlSetupParserForBuffer:
12700 * @ctxt: an XML parser context
12701 * @buffer: a xmlChar * buffer
12702 * @filename: a file name
12703 *
12704 * Setup the parser context to parse a new buffer; Clears any prior
12705 * contents from the parser context. The buffer parameter must not be
12706 * NULL, but the filename parameter can be
12707 */
12708void
12709xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12710 const char* filename)
12711{
12712 xmlParserInputPtr input;
12713
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012714 if ((ctxt == NULL) || (buffer == NULL))
12715 return;
12716
Owen Taylor3473f882001-02-23 17:55:21 +000012717 input = xmlNewInputStream(ctxt);
12718 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012719 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012720 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012721 return;
12722 }
12723
12724 xmlClearParserCtxt(ctxt);
12725 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012726 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012727 input->base = buffer;
12728 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012729 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012730 inputPush(ctxt, input);
12731}
12732
12733/**
12734 * xmlSAXUserParseFile:
12735 * @sax: a SAX handler
12736 * @user_data: The user data returned on SAX callbacks
12737 * @filename: a file name
12738 *
12739 * parse an XML file and call the given SAX handler routines.
12740 * Automatic support for ZLIB/Compress compressed document is provided
12741 *
12742 * Returns 0 in case of success or a error number otherwise
12743 */
12744int
12745xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12746 const char *filename) {
12747 int ret = 0;
12748 xmlParserCtxtPtr ctxt;
12749
12750 ctxt = xmlCreateFileParserCtxt(filename);
12751 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012752 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012753 xmlFree(ctxt->sax);
12754 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012755 xmlDetectSAX2(ctxt);
12756
Owen Taylor3473f882001-02-23 17:55:21 +000012757 if (user_data != NULL)
12758 ctxt->userData = user_data;
12759
12760 xmlParseDocument(ctxt);
12761
12762 if (ctxt->wellFormed)
12763 ret = 0;
12764 else {
12765 if (ctxt->errNo != 0)
12766 ret = ctxt->errNo;
12767 else
12768 ret = -1;
12769 }
12770 if (sax != NULL)
12771 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012772 if (ctxt->myDoc != NULL) {
12773 xmlFreeDoc(ctxt->myDoc);
12774 ctxt->myDoc = NULL;
12775 }
Owen Taylor3473f882001-02-23 17:55:21 +000012776 xmlFreeParserCtxt(ctxt);
12777
12778 return ret;
12779}
Daniel Veillard81273902003-09-30 00:43:48 +000012780#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012781
12782/************************************************************************
12783 * *
12784 * Front ends when parsing from memory *
12785 * *
12786 ************************************************************************/
12787
12788/**
12789 * xmlCreateMemoryParserCtxt:
12790 * @buffer: a pointer to a char array
12791 * @size: the size of the array
12792 *
12793 * Create a parser context for an XML in-memory document.
12794 *
12795 * Returns the new parser context or NULL
12796 */
12797xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012798xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012799 xmlParserCtxtPtr ctxt;
12800 xmlParserInputPtr input;
12801 xmlParserInputBufferPtr buf;
12802
12803 if (buffer == NULL)
12804 return(NULL);
12805 if (size <= 0)
12806 return(NULL);
12807
12808 ctxt = xmlNewParserCtxt();
12809 if (ctxt == NULL)
12810 return(NULL);
12811
Daniel Veillard53350552003-09-18 13:35:51 +000012812 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012813 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012814 if (buf == NULL) {
12815 xmlFreeParserCtxt(ctxt);
12816 return(NULL);
12817 }
Owen Taylor3473f882001-02-23 17:55:21 +000012818
12819 input = xmlNewInputStream(ctxt);
12820 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012821 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012822 xmlFreeParserCtxt(ctxt);
12823 return(NULL);
12824 }
12825
12826 input->filename = NULL;
12827 input->buf = buf;
12828 input->base = input->buf->buffer->content;
12829 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012830 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012831
12832 inputPush(ctxt, input);
12833 return(ctxt);
12834}
12835
Daniel Veillard81273902003-09-30 00:43:48 +000012836#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012837/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012838 * xmlSAXParseMemoryWithData:
12839 * @sax: the SAX handler block
12840 * @buffer: an pointer to a char array
12841 * @size: the size of the array
12842 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12843 * documents
12844 * @data: the userdata
12845 *
12846 * parse an XML in-memory block and use the given SAX function block
12847 * to handle the parsing callback. If sax is NULL, fallback to the default
12848 * DOM tree building routines.
12849 *
12850 * User data (void *) is stored within the parser context in the
12851 * context's _private member, so it is available nearly everywhere in libxml
12852 *
12853 * Returns the resulting document tree
12854 */
12855
12856xmlDocPtr
12857xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12858 int size, int recovery, void *data) {
12859 xmlDocPtr ret;
12860 xmlParserCtxtPtr ctxt;
12861
12862 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12863 if (ctxt == NULL) return(NULL);
12864 if (sax != NULL) {
12865 if (ctxt->sax != NULL)
12866 xmlFree(ctxt->sax);
12867 ctxt->sax = sax;
12868 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012869 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012870 if (data!=NULL) {
12871 ctxt->_private=data;
12872 }
12873
Daniel Veillardadba5f12003-04-04 16:09:01 +000012874 ctxt->recovery = recovery;
12875
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012876 xmlParseDocument(ctxt);
12877
12878 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12879 else {
12880 ret = NULL;
12881 xmlFreeDoc(ctxt->myDoc);
12882 ctxt->myDoc = NULL;
12883 }
12884 if (sax != NULL)
12885 ctxt->sax = NULL;
12886 xmlFreeParserCtxt(ctxt);
12887
12888 return(ret);
12889}
12890
12891/**
Owen Taylor3473f882001-02-23 17:55:21 +000012892 * xmlSAXParseMemory:
12893 * @sax: the SAX handler block
12894 * @buffer: an pointer to a char array
12895 * @size: the size of the array
12896 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12897 * documents
12898 *
12899 * parse an XML in-memory block and use the given SAX function block
12900 * to handle the parsing callback. If sax is NULL, fallback to the default
12901 * DOM tree building routines.
12902 *
12903 * Returns the resulting document tree
12904 */
12905xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012906xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12907 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012908 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012909}
12910
12911/**
12912 * xmlParseMemory:
12913 * @buffer: an pointer to a char array
12914 * @size: the size of the array
12915 *
12916 * parse an XML in-memory block and build a tree.
12917 *
12918 * Returns the resulting document tree
12919 */
12920
Daniel Veillard50822cb2001-07-26 20:05:51 +000012921xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012922 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12923}
12924
12925/**
12926 * xmlRecoverMemory:
12927 * @buffer: an pointer to a char array
12928 * @size: the size of the array
12929 *
12930 * parse an XML in-memory block and build a tree.
12931 * In the case the document is not Well Formed, a tree is built anyway
12932 *
12933 * Returns the resulting document tree
12934 */
12935
Daniel Veillard50822cb2001-07-26 20:05:51 +000012936xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012937 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12938}
12939
12940/**
12941 * xmlSAXUserParseMemory:
12942 * @sax: a SAX handler
12943 * @user_data: The user data returned on SAX callbacks
12944 * @buffer: an in-memory XML document input
12945 * @size: the length of the XML document in bytes
12946 *
12947 * A better SAX parsing routine.
12948 * parse an XML in-memory buffer and call the given SAX handler routines.
12949 *
12950 * Returns 0 in case of success or a error number otherwise
12951 */
12952int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012953 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012954 int ret = 0;
12955 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012956
12957 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12958 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012959 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12960 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000012961 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012962 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012963
Daniel Veillard30211a02001-04-26 09:33:18 +000012964 if (user_data != NULL)
12965 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012966
12967 xmlParseDocument(ctxt);
12968
12969 if (ctxt->wellFormed)
12970 ret = 0;
12971 else {
12972 if (ctxt->errNo != 0)
12973 ret = ctxt->errNo;
12974 else
12975 ret = -1;
12976 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012977 if (sax != NULL)
12978 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012979 if (ctxt->myDoc != NULL) {
12980 xmlFreeDoc(ctxt->myDoc);
12981 ctxt->myDoc = NULL;
12982 }
Owen Taylor3473f882001-02-23 17:55:21 +000012983 xmlFreeParserCtxt(ctxt);
12984
12985 return ret;
12986}
Daniel Veillard81273902003-09-30 00:43:48 +000012987#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012988
12989/**
12990 * xmlCreateDocParserCtxt:
12991 * @cur: a pointer to an array of xmlChar
12992 *
12993 * Creates a parser context for an XML in-memory document.
12994 *
12995 * Returns the new parser context or NULL
12996 */
12997xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012999 int len;
13000
13001 if (cur == NULL)
13002 return(NULL);
13003 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013004 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013005}
13006
Daniel Veillard81273902003-09-30 00:43:48 +000013007#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013008/**
13009 * xmlSAXParseDoc:
13010 * @sax: the SAX handler block
13011 * @cur: a pointer to an array of xmlChar
13012 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13013 * documents
13014 *
13015 * parse an XML in-memory document and build a tree.
13016 * It use the given SAX function block to handle the parsing callback.
13017 * If sax is NULL, fallback to the default DOM tree building routines.
13018 *
13019 * Returns the resulting document tree
13020 */
13021
13022xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013023xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013024 xmlDocPtr ret;
13025 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013026 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013027
Daniel Veillard38936062004-11-04 17:45:11 +000013028 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013029
13030
13031 ctxt = xmlCreateDocParserCtxt(cur);
13032 if (ctxt == NULL) return(NULL);
13033 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013034 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013035 ctxt->sax = sax;
13036 ctxt->userData = NULL;
13037 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013038 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013039
13040 xmlParseDocument(ctxt);
13041 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13042 else {
13043 ret = NULL;
13044 xmlFreeDoc(ctxt->myDoc);
13045 ctxt->myDoc = NULL;
13046 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013047 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013048 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013049 xmlFreeParserCtxt(ctxt);
13050
13051 return(ret);
13052}
13053
13054/**
13055 * xmlParseDoc:
13056 * @cur: a pointer to an array of xmlChar
13057 *
13058 * parse an XML in-memory document and build a tree.
13059 *
13060 * Returns the resulting document tree
13061 */
13062
13063xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013064xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013065 return(xmlSAXParseDoc(NULL, cur, 0));
13066}
Daniel Veillard81273902003-09-30 00:43:48 +000013067#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013068
Daniel Veillard81273902003-09-30 00:43:48 +000013069#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013070/************************************************************************
13071 * *
13072 * Specific function to keep track of entities references *
13073 * and used by the XSLT debugger *
13074 * *
13075 ************************************************************************/
13076
13077static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13078
13079/**
13080 * xmlAddEntityReference:
13081 * @ent : A valid entity
13082 * @firstNode : A valid first node for children of entity
13083 * @lastNode : A valid last node of children entity
13084 *
13085 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13086 */
13087static void
13088xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13089 xmlNodePtr lastNode)
13090{
13091 if (xmlEntityRefFunc != NULL) {
13092 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13093 }
13094}
13095
13096
13097/**
13098 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013099 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013100 *
13101 * Set the function to call call back when a xml reference has been made
13102 */
13103void
13104xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13105{
13106 xmlEntityRefFunc = func;
13107}
Daniel Veillard81273902003-09-30 00:43:48 +000013108#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013109
13110/************************************************************************
13111 * *
13112 * Miscellaneous *
13113 * *
13114 ************************************************************************/
13115
13116#ifdef LIBXML_XPATH_ENABLED
13117#include <libxml/xpath.h>
13118#endif
13119
Daniel Veillardffa3c742005-07-21 13:24:09 +000013120extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013121static int xmlParserInitialized = 0;
13122
13123/**
13124 * xmlInitParser:
13125 *
13126 * Initialization function for the XML parser.
13127 * This is not reentrant. Call once before processing in case of
13128 * use in multithreaded programs.
13129 */
13130
13131void
13132xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013133 if (xmlParserInitialized != 0)
13134 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013135
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013136#ifdef LIBXML_THREAD_ENABLED
13137 __xmlGlobalInitMutexLock();
13138 if (xmlParserInitialized == 0) {
13139#endif
13140 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13141 (xmlGenericError == NULL))
13142 initGenericErrorDefaultFunc(NULL);
13143 xmlInitGlobals();
13144 xmlInitThreads();
13145 xmlInitMemory();
13146 xmlInitCharEncodingHandlers();
13147 xmlDefaultSAXHandlerInit();
13148 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013149#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013150 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013151#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013152#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013153 htmlInitAutoClose();
13154 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013155#endif
13156#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013157 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013158#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013159 xmlParserInitialized = 1;
13160#ifdef LIBXML_THREAD_ENABLED
13161 }
13162 __xmlGlobalInitMutexUnlock();
13163#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013164}
13165
13166/**
13167 * xmlCleanupParser:
13168 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013169 * Cleanup function for the XML library. It tries to reclaim all
13170 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000013171 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013172 * function should not prevent reusing the library but one should
13173 * call xmlCleanupParser() only when the process has
Daniel Veillardccc476f2008-03-04 13:19:49 +000013174 * finished using the library and all XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000013175 */
13176
13177void
13178xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013179 if (!xmlParserInitialized)
13180 return;
13181
Owen Taylor3473f882001-02-23 17:55:21 +000013182 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013183#ifdef LIBXML_CATALOG_ENABLED
13184 xmlCatalogCleanup();
13185#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013186 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013187 xmlCleanupInputCallbacks();
13188#ifdef LIBXML_OUTPUT_ENABLED
13189 xmlCleanupOutputCallbacks();
13190#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013191#ifdef LIBXML_SCHEMAS_ENABLED
13192 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013193 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013194#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013195 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013196 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013197 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013198 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013199 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013200}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013201
13202/************************************************************************
13203 * *
13204 * New set (2.6.0) of simpler and more flexible APIs *
13205 * *
13206 ************************************************************************/
13207
13208/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013209 * DICT_FREE:
13210 * @str: a string
13211 *
13212 * Free a string if it is not owned by the "dict" dictionnary in the
13213 * current scope
13214 */
13215#define DICT_FREE(str) \
13216 if ((str) && ((!dict) || \
13217 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13218 xmlFree((char *)(str));
13219
13220/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013221 * xmlCtxtReset:
13222 * @ctxt: an XML parser context
13223 *
13224 * Reset a parser context
13225 */
13226void
13227xmlCtxtReset(xmlParserCtxtPtr ctxt)
13228{
13229 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013230 xmlDictPtr dict;
13231
13232 if (ctxt == NULL)
13233 return;
13234
13235 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236
13237 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13238 xmlFreeInputStream(input);
13239 }
13240 ctxt->inputNr = 0;
13241 ctxt->input = NULL;
13242
13243 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013244 if (ctxt->spaceTab != NULL) {
13245 ctxt->spaceTab[0] = -1;
13246 ctxt->space = &ctxt->spaceTab[0];
13247 } else {
13248 ctxt->space = NULL;
13249 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013250
13251
13252 ctxt->nodeNr = 0;
13253 ctxt->node = NULL;
13254
13255 ctxt->nameNr = 0;
13256 ctxt->name = NULL;
13257
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013258 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013259 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013260 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013261 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013262 DICT_FREE(ctxt->directory);
13263 ctxt->directory = NULL;
13264 DICT_FREE(ctxt->extSubURI);
13265 ctxt->extSubURI = NULL;
13266 DICT_FREE(ctxt->extSubSystem);
13267 ctxt->extSubSystem = NULL;
13268 if (ctxt->myDoc != NULL)
13269 xmlFreeDoc(ctxt->myDoc);
13270 ctxt->myDoc = NULL;
13271
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013272 ctxt->standalone = -1;
13273 ctxt->hasExternalSubset = 0;
13274 ctxt->hasPErefs = 0;
13275 ctxt->html = 0;
13276 ctxt->external = 0;
13277 ctxt->instate = XML_PARSER_START;
13278 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013279
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 ctxt->wellFormed = 1;
13281 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013282 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013283 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013284#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013285 ctxt->vctxt.userData = ctxt;
13286 ctxt->vctxt.error = xmlParserValidityError;
13287 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013288#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289 ctxt->record_info = 0;
13290 ctxt->nbChars = 0;
13291 ctxt->checkIndex = 0;
13292 ctxt->inSubset = 0;
13293 ctxt->errNo = XML_ERR_OK;
13294 ctxt->depth = 0;
13295 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13296 ctxt->catalogs = NULL;
13297 xmlInitNodeInfoSeq(&ctxt->node_seq);
13298
13299 if (ctxt->attsDefault != NULL) {
13300 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13301 ctxt->attsDefault = NULL;
13302 }
13303 if (ctxt->attsSpecial != NULL) {
13304 xmlHashFree(ctxt->attsSpecial, NULL);
13305 ctxt->attsSpecial = NULL;
13306 }
13307
Daniel Veillard4432df22003-09-28 18:58:27 +000013308#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013309 if (ctxt->catalogs != NULL)
13310 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013311#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013312 if (ctxt->lastError.code != XML_ERR_OK)
13313 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013314}
13315
13316/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013317 * xmlCtxtResetPush:
13318 * @ctxt: an XML parser context
13319 * @chunk: a pointer to an array of chars
13320 * @size: number of chars in the array
13321 * @filename: an optional file name or URI
13322 * @encoding: the document encoding, or NULL
13323 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013324 * Reset a push parser context
13325 *
13326 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013327 */
13328int
13329xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13330 int size, const char *filename, const char *encoding)
13331{
13332 xmlParserInputPtr inputStream;
13333 xmlParserInputBufferPtr buf;
13334 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13335
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013336 if (ctxt == NULL)
13337 return(1);
13338
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013339 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13340 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13341
13342 buf = xmlAllocParserInputBuffer(enc);
13343 if (buf == NULL)
13344 return(1);
13345
13346 if (ctxt == NULL) {
13347 xmlFreeParserInputBuffer(buf);
13348 return(1);
13349 }
13350
13351 xmlCtxtReset(ctxt);
13352
13353 if (ctxt->pushTab == NULL) {
13354 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13355 sizeof(xmlChar *));
13356 if (ctxt->pushTab == NULL) {
13357 xmlErrMemory(ctxt, NULL);
13358 xmlFreeParserInputBuffer(buf);
13359 return(1);
13360 }
13361 }
13362
13363 if (filename == NULL) {
13364 ctxt->directory = NULL;
13365 } else {
13366 ctxt->directory = xmlParserGetDirectory(filename);
13367 }
13368
13369 inputStream = xmlNewInputStream(ctxt);
13370 if (inputStream == NULL) {
13371 xmlFreeParserInputBuffer(buf);
13372 return(1);
13373 }
13374
13375 if (filename == NULL)
13376 inputStream->filename = NULL;
13377 else
13378 inputStream->filename = (char *)
13379 xmlCanonicPath((const xmlChar *) filename);
13380 inputStream->buf = buf;
13381 inputStream->base = inputStream->buf->buffer->content;
13382 inputStream->cur = inputStream->buf->buffer->content;
13383 inputStream->end =
13384 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13385
13386 inputPush(ctxt, inputStream);
13387
13388 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13389 (ctxt->input->buf != NULL)) {
13390 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13391 int cur = ctxt->input->cur - ctxt->input->base;
13392
13393 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13394
13395 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13396 ctxt->input->cur = ctxt->input->base + cur;
13397 ctxt->input->end =
13398 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13399 use];
13400#ifdef DEBUG_PUSH
13401 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13402#endif
13403 }
13404
13405 if (encoding != NULL) {
13406 xmlCharEncodingHandlerPtr hdlr;
13407
13408 hdlr = xmlFindCharEncodingHandler(encoding);
13409 if (hdlr != NULL) {
13410 xmlSwitchToEncoding(ctxt, hdlr);
13411 } else {
13412 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13413 "Unsupported encoding %s\n", BAD_CAST encoding);
13414 }
13415 } else if (enc != XML_CHAR_ENCODING_NONE) {
13416 xmlSwitchEncoding(ctxt, enc);
13417 }
13418
13419 return(0);
13420}
13421
13422/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013423 * xmlCtxtUseOptions:
13424 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013425 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013426 *
13427 * Applies the options to the parser context
13428 *
13429 * Returns 0 in case of success, the set of unknown or unimplemented options
13430 * in case of error.
13431 */
13432int
13433xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13434{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013435 if (ctxt == NULL)
13436 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013437 if (options & XML_PARSE_RECOVER) {
13438 ctxt->recovery = 1;
13439 options -= XML_PARSE_RECOVER;
13440 } else
13441 ctxt->recovery = 0;
13442 if (options & XML_PARSE_DTDLOAD) {
13443 ctxt->loadsubset = XML_DETECT_IDS;
13444 options -= XML_PARSE_DTDLOAD;
13445 } else
13446 ctxt->loadsubset = 0;
13447 if (options & XML_PARSE_DTDATTR) {
13448 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13449 options -= XML_PARSE_DTDATTR;
13450 }
13451 if (options & XML_PARSE_NOENT) {
13452 ctxt->replaceEntities = 1;
13453 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13454 options -= XML_PARSE_NOENT;
13455 } else
13456 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013457 if (options & XML_PARSE_PEDANTIC) {
13458 ctxt->pedantic = 1;
13459 options -= XML_PARSE_PEDANTIC;
13460 } else
13461 ctxt->pedantic = 0;
13462 if (options & XML_PARSE_NOBLANKS) {
13463 ctxt->keepBlanks = 0;
13464 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13465 options -= XML_PARSE_NOBLANKS;
13466 } else
13467 ctxt->keepBlanks = 1;
13468 if (options & XML_PARSE_DTDVALID) {
13469 ctxt->validate = 1;
13470 if (options & XML_PARSE_NOWARNING)
13471 ctxt->vctxt.warning = NULL;
13472 if (options & XML_PARSE_NOERROR)
13473 ctxt->vctxt.error = NULL;
13474 options -= XML_PARSE_DTDVALID;
13475 } else
13476 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013477 if (options & XML_PARSE_NOWARNING) {
13478 ctxt->sax->warning = NULL;
13479 options -= XML_PARSE_NOWARNING;
13480 }
13481 if (options & XML_PARSE_NOERROR) {
13482 ctxt->sax->error = NULL;
13483 ctxt->sax->fatalError = NULL;
13484 options -= XML_PARSE_NOERROR;
13485 }
Daniel Veillard81273902003-09-30 00:43:48 +000013486#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013487 if (options & XML_PARSE_SAX1) {
13488 ctxt->sax->startElement = xmlSAX2StartElement;
13489 ctxt->sax->endElement = xmlSAX2EndElement;
13490 ctxt->sax->startElementNs = NULL;
13491 ctxt->sax->endElementNs = NULL;
13492 ctxt->sax->initialized = 1;
13493 options -= XML_PARSE_SAX1;
13494 }
Daniel Veillard81273902003-09-30 00:43:48 +000013495#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013496 if (options & XML_PARSE_NODICT) {
13497 ctxt->dictNames = 0;
13498 options -= XML_PARSE_NODICT;
13499 } else {
13500 ctxt->dictNames = 1;
13501 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013502 if (options & XML_PARSE_NOCDATA) {
13503 ctxt->sax->cdataBlock = NULL;
13504 options -= XML_PARSE_NOCDATA;
13505 }
13506 if (options & XML_PARSE_NSCLEAN) {
13507 ctxt->options |= XML_PARSE_NSCLEAN;
13508 options -= XML_PARSE_NSCLEAN;
13509 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013510 if (options & XML_PARSE_NONET) {
13511 ctxt->options |= XML_PARSE_NONET;
13512 options -= XML_PARSE_NONET;
13513 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013514 if (options & XML_PARSE_COMPACT) {
13515 ctxt->options |= XML_PARSE_COMPACT;
13516 options -= XML_PARSE_COMPACT;
13517 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013518 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013519 return (options);
13520}
13521
13522/**
13523 * xmlDoRead:
13524 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013525 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013526 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013527 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013528 * @reuse: keep the context for reuse
13529 *
13530 * Common front-end for the xmlRead functions
13531 *
13532 * Returns the resulting document tree or NULL
13533 */
13534static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013535xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13536 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013537{
13538 xmlDocPtr ret;
13539
13540 xmlCtxtUseOptions(ctxt, options);
13541 if (encoding != NULL) {
13542 xmlCharEncodingHandlerPtr hdlr;
13543
13544 hdlr = xmlFindCharEncodingHandler(encoding);
13545 if (hdlr != NULL)
13546 xmlSwitchToEncoding(ctxt, hdlr);
13547 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013548 if ((URL != NULL) && (ctxt->input != NULL) &&
13549 (ctxt->input->filename == NULL))
13550 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013551 xmlParseDocument(ctxt);
13552 if ((ctxt->wellFormed) || ctxt->recovery)
13553 ret = ctxt->myDoc;
13554 else {
13555 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013556 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013557 xmlFreeDoc(ctxt->myDoc);
13558 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013559 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013560 ctxt->myDoc = NULL;
13561 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013562 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013563 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013564
13565 return (ret);
13566}
13567
13568/**
13569 * xmlReadDoc:
13570 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013571 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013572 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013573 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013574 *
13575 * parse an XML in-memory document and build a tree.
13576 *
13577 * Returns the resulting document tree
13578 */
13579xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013580xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013581{
13582 xmlParserCtxtPtr ctxt;
13583
13584 if (cur == NULL)
13585 return (NULL);
13586
13587 ctxt = xmlCreateDocParserCtxt(cur);
13588 if (ctxt == NULL)
13589 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013590 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013591}
13592
13593/**
13594 * xmlReadFile:
13595 * @filename: a file or URL
13596 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013597 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013598 *
13599 * parse an XML file from the filesystem or the network.
13600 *
13601 * Returns the resulting document tree
13602 */
13603xmlDocPtr
13604xmlReadFile(const char *filename, const char *encoding, int options)
13605{
13606 xmlParserCtxtPtr ctxt;
13607
Daniel Veillard61b93382003-11-03 14:28:31 +000013608 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013609 if (ctxt == NULL)
13610 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013611 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013612}
13613
13614/**
13615 * xmlReadMemory:
13616 * @buffer: a pointer to a char array
13617 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013618 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013619 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013620 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013621 *
13622 * parse an XML in-memory document and build a tree.
13623 *
13624 * Returns the resulting document tree
13625 */
13626xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013627xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013628{
13629 xmlParserCtxtPtr ctxt;
13630
13631 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13632 if (ctxt == NULL)
13633 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013634 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013635}
13636
13637/**
13638 * xmlReadFd:
13639 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013640 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013641 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013642 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013643 *
13644 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013645 * NOTE that the file descriptor will not be closed when the
13646 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013647 *
13648 * Returns the resulting document tree
13649 */
13650xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013651xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013652{
13653 xmlParserCtxtPtr ctxt;
13654 xmlParserInputBufferPtr input;
13655 xmlParserInputPtr stream;
13656
13657 if (fd < 0)
13658 return (NULL);
13659
13660 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13661 if (input == NULL)
13662 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013663 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013664 ctxt = xmlNewParserCtxt();
13665 if (ctxt == NULL) {
13666 xmlFreeParserInputBuffer(input);
13667 return (NULL);
13668 }
13669 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13670 if (stream == NULL) {
13671 xmlFreeParserInputBuffer(input);
13672 xmlFreeParserCtxt(ctxt);
13673 return (NULL);
13674 }
13675 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013676 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013677}
13678
13679/**
13680 * xmlReadIO:
13681 * @ioread: an I/O read function
13682 * @ioclose: an I/O close function
13683 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013684 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013685 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013686 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013687 *
13688 * parse an XML document from I/O functions and source and build a tree.
13689 *
13690 * Returns the resulting document tree
13691 */
13692xmlDocPtr
13693xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013694 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013695{
13696 xmlParserCtxtPtr ctxt;
13697 xmlParserInputBufferPtr input;
13698 xmlParserInputPtr stream;
13699
13700 if (ioread == NULL)
13701 return (NULL);
13702
13703 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13704 XML_CHAR_ENCODING_NONE);
13705 if (input == NULL)
13706 return (NULL);
13707 ctxt = xmlNewParserCtxt();
13708 if (ctxt == NULL) {
13709 xmlFreeParserInputBuffer(input);
13710 return (NULL);
13711 }
13712 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13713 if (stream == NULL) {
13714 xmlFreeParserInputBuffer(input);
13715 xmlFreeParserCtxt(ctxt);
13716 return (NULL);
13717 }
13718 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013719 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013720}
13721
13722/**
13723 * xmlCtxtReadDoc:
13724 * @ctxt: an XML parser context
13725 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013726 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013727 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013728 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013729 *
13730 * parse an XML in-memory document and build a tree.
13731 * This reuses the existing @ctxt parser context
13732 *
13733 * Returns the resulting document tree
13734 */
13735xmlDocPtr
13736xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013737 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013738{
13739 xmlParserInputPtr stream;
13740
13741 if (cur == NULL)
13742 return (NULL);
13743 if (ctxt == NULL)
13744 return (NULL);
13745
13746 xmlCtxtReset(ctxt);
13747
13748 stream = xmlNewStringInputStream(ctxt, cur);
13749 if (stream == NULL) {
13750 return (NULL);
13751 }
13752 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013753 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013754}
13755
13756/**
13757 * xmlCtxtReadFile:
13758 * @ctxt: an XML parser context
13759 * @filename: a file or URL
13760 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013761 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013762 *
13763 * parse an XML file from the filesystem or the network.
13764 * This reuses the existing @ctxt parser context
13765 *
13766 * Returns the resulting document tree
13767 */
13768xmlDocPtr
13769xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13770 const char *encoding, int options)
13771{
13772 xmlParserInputPtr stream;
13773
13774 if (filename == NULL)
13775 return (NULL);
13776 if (ctxt == NULL)
13777 return (NULL);
13778
13779 xmlCtxtReset(ctxt);
13780
Daniel Veillard29614c72004-11-26 10:47:26 +000013781 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013782 if (stream == NULL) {
13783 return (NULL);
13784 }
13785 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013786 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013787}
13788
13789/**
13790 * xmlCtxtReadMemory:
13791 * @ctxt: an XML parser context
13792 * @buffer: a pointer to a char array
13793 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013794 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013795 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013796 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013797 *
13798 * parse an XML in-memory document and build a tree.
13799 * This reuses the existing @ctxt parser context
13800 *
13801 * Returns the resulting document tree
13802 */
13803xmlDocPtr
13804xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013805 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013806{
13807 xmlParserInputBufferPtr input;
13808 xmlParserInputPtr stream;
13809
13810 if (ctxt == NULL)
13811 return (NULL);
13812 if (buffer == NULL)
13813 return (NULL);
13814
13815 xmlCtxtReset(ctxt);
13816
13817 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13818 if (input == NULL) {
13819 return(NULL);
13820 }
13821
13822 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13823 if (stream == NULL) {
13824 xmlFreeParserInputBuffer(input);
13825 return(NULL);
13826 }
13827
13828 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013829 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013830}
13831
13832/**
13833 * xmlCtxtReadFd:
13834 * @ctxt: an XML parser context
13835 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013836 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013837 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013838 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013839 *
13840 * parse an XML from a file descriptor and build a tree.
13841 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013842 * NOTE that the file descriptor will not be closed when the
13843 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013844 *
13845 * Returns the resulting document tree
13846 */
13847xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013848xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13849 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013850{
13851 xmlParserInputBufferPtr input;
13852 xmlParserInputPtr stream;
13853
13854 if (fd < 0)
13855 return (NULL);
13856 if (ctxt == NULL)
13857 return (NULL);
13858
13859 xmlCtxtReset(ctxt);
13860
13861
13862 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13863 if (input == NULL)
13864 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013865 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013866 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13867 if (stream == NULL) {
13868 xmlFreeParserInputBuffer(input);
13869 return (NULL);
13870 }
13871 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013872 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013873}
13874
13875/**
13876 * xmlCtxtReadIO:
13877 * @ctxt: an XML parser context
13878 * @ioread: an I/O read function
13879 * @ioclose: an I/O close function
13880 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013881 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013882 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013883 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013884 *
13885 * parse an XML document from I/O functions and source and build a tree.
13886 * This reuses the existing @ctxt parser context
13887 *
13888 * Returns the resulting document tree
13889 */
13890xmlDocPtr
13891xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13892 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013893 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013894 const char *encoding, int options)
13895{
13896 xmlParserInputBufferPtr input;
13897 xmlParserInputPtr stream;
13898
13899 if (ioread == NULL)
13900 return (NULL);
13901 if (ctxt == NULL)
13902 return (NULL);
13903
13904 xmlCtxtReset(ctxt);
13905
13906 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13907 XML_CHAR_ENCODING_NONE);
13908 if (input == NULL)
13909 return (NULL);
13910 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13911 if (stream == NULL) {
13912 xmlFreeParserInputBuffer(input);
13913 return (NULL);
13914 }
13915 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013916 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013917}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013918
13919#define bottom_parser
13920#include "elfgcchack.h"