blob: 27db3c625aee2025650d8d7014bbf8b86d97bea6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000129static int
130xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
131
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132/************************************************************************
133 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000134 * Some factorized error routines *
135 * *
136 ************************************************************************/
137
138/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000139 * xmlErrAttributeDup:
140 * @ctxt: an XML parser context
141 * @prefix: the attribute prefix
142 * @localname: the attribute localname
143 *
144 * Handle a redefinition of attribute error
145 */
146static void
147xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
148 const xmlChar * localname)
149{
Daniel Veillard157fee02003-10-31 10:36:03 +0000150 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
151 (ctxt->instate == XML_PARSER_EOF))
152 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000153 if (ctxt != NULL)
154 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000155 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000156 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000157 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
158 (const char *) localname, NULL, NULL, 0, 0,
159 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000160 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000161 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000162 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
163 (const char *) prefix, (const char *) localname,
164 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
165 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000166 if (ctxt != NULL) {
167 ctxt->wellFormed = 0;
168 if (ctxt->recovery == 0)
169 ctxt->disableSAX = 1;
170 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171}
172
173/**
174 * xmlFatalErr:
175 * @ctxt: an XML parser context
176 * @error: the error number
177 * @extra: extra information string
178 *
179 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
180 */
181static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183{
184 const char *errmsg;
185
Daniel Veillard157fee02003-10-31 10:36:03 +0000186 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
187 (ctxt->instate == XML_PARSER_EOF))
188 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 switch (error) {
190 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid hexadecimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid decimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "internal error";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference at end of document\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in prolog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in epilog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: no name\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: expecting ';'\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Detected an entity reference loop\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "PEReferences forbidden in internal subset\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "EntityValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "AttValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unescaped '<' not allowed in attributes values\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SystemLiteral \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Unfinished System or Public ID \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Sequence ']]>' not allowed in content\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "PUBLIC, the Public Identifier is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Comment must not contain '--' (double-hyphen)\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "xmlParsePI : no target name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Invalid PI name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NOTATION: Name expected here\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'>' required to close NOTATION declaration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Entity value required\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Fragment not allowed";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "'(' required to start ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "NmToken expected in ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "')' required to finish ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : Name or '(' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg =
294 "PEReference: forbidden within markup decl in internal subset\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "expected '>'\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "XML conditional section '[' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "Content error in the external subset\n";
304 break;
305 case XML_ERR_CONDSEC_INVALID_KEYWORD:
306 errmsg =
307 "conditional section INCLUDE or IGNORE keyword expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "XML conditional section not closed\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "Text declaration '<?xml' required\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "parsing XML declaration: '?>' expected\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "external parsed entities cannot be standalone\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EntityRef: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "DOCTYPE improperly terminated\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EndTag: '</' not found\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "expected '='\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not closed expecting \" or '\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not started expecting ' or \"\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Invalid XML encoding name\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "standalone accepts only 'yes' or 'no'\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Document is empty\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Extra content at the end of the document\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "chunk is not well balanced\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "extra content at the end of well balanced chunk\n";
356 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000357 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Malformed declaration expecting version\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 case:
362 errmsg = "\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 default:
366 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000368 if (ctxt != NULL)
369 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
372 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000380/**
381 * xmlFatalErrMsg:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @msg: the error message
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000391{
Daniel Veillard157fee02003-10-31 10:36:03 +0000392 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393 (ctxt->instate == XML_PARSER_EOF))
394 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000395 if (ctxt != NULL)
396 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000397 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL) {
400 ctxt->wellFormed = 0;
401 if (ctxt->recovery == 0)
402 ctxt->disableSAX = 1;
403 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000404}
405
406/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000407 * xmlWarningMsg:
408 * @ctxt: an XML parser context
409 * @error: the error number
410 * @msg: the error message
411 * @str1: extra data
412 * @str2: extra data
413 *
414 * Handle a warning.
415 */
416static void
417xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
418 const char *msg, const xmlChar *str1, const xmlChar *str2)
419{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000420 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000421
Daniel Veillard157fee02003-10-31 10:36:03 +0000422 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
423 (ctxt->instate == XML_PARSER_EOF))
424 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000425 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
426 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000427 schannel = ctxt->sax->serror;
428 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000429 (ctxt->sax) ? ctxt->sax->warning : NULL,
430 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000431 ctxt, NULL, XML_FROM_PARSER, error,
432 XML_ERR_WARNING, NULL, 0,
433 (const char *) str1, (const char *) str2, NULL, 0, 0,
434 msg, (const char *) str1, (const char *) str2);
435}
436
437/**
438 * xmlValidityError:
439 * @ctxt: an XML parser context
440 * @error: the error number
441 * @msg: the error message
442 * @str1: extra data
443 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000444 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 */
446static void
447xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
448 const char *msg, const xmlChar *str1)
449{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000450 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000451
452 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
453 (ctxt->instate == XML_PARSER_EOF))
454 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000455 if (ctxt != NULL) {
456 ctxt->errNo = error;
457 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
458 schannel = ctxt->sax->serror;
459 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000460 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000461 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000462 ctxt, NULL, XML_FROM_DTD, error,
463 XML_ERR_ERROR, NULL, 0, (const char *) str1,
464 NULL, NULL, 0, 0,
465 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL) {
467 ctxt->valid = 0;
468 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000469}
470
471/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 * xmlFatalErrMsgInt:
473 * @ctxt: an XML parser context
474 * @error: the error number
475 * @msg: the error message
476 * @val: an integer value
477 *
478 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
479 */
480static void
481xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483{
Daniel Veillard157fee02003-10-31 10:36:03 +0000484 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
485 (ctxt->instate == XML_PARSER_EOF))
486 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000487 if (ctxt != NULL)
488 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000489 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000490 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
491 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000492 if (ctxt != NULL) {
493 ctxt->wellFormed = 0;
494 if (ctxt->recovery == 0)
495 ctxt->disableSAX = 1;
496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000497}
498
499/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 * xmlFatalErrMsgStrIntStr:
501 * @ctxt: an XML parser context
502 * @error: the error number
503 * @msg: the error message
504 * @str1: an string info
505 * @val: an integer value
506 * @str2: an string info
507 *
508 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
509 */
510static void
511xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, int val,
513 const xmlChar *str2)
514{
Daniel Veillard157fee02003-10-31 10:36:03 +0000515 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
516 (ctxt->instate == XML_PARSER_EOF))
517 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000518 if (ctxt != NULL)
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
522 NULL, 0, (const char *) str1, (const char *) str2,
523 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if (ctxt != NULL) {
525 ctxt->wellFormed = 0;
526 if (ctxt->recovery == 0)
527 ctxt->disableSAX = 1;
528 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000529}
530
531/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000532 * xmlFatalErrMsgStr:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @val: a string value
537 *
538 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
539 */
540static void
541xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000543{
Daniel Veillard157fee02003-10-31 10:36:03 +0000544 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545 (ctxt->instate == XML_PARSER_EOF))
546 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000547 if (ctxt != NULL)
548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 XML_FROM_PARSER, error, XML_ERR_FATAL,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->wellFormed = 0;
555 if (ctxt->recovery == 0)
556 ctxt->disableSAX = 1;
557 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000558}
559
560/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000561 * xmlErrMsgStr:
562 * @ctxt: an XML parser context
563 * @error: the error number
564 * @msg: the error message
565 * @val: a string value
566 *
567 * Handle a non fatal parser error
568 */
569static void
570xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571 const char *msg, const xmlChar * val)
572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL)
577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000579 XML_FROM_PARSER, error, XML_ERR_ERROR,
580 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
581 val);
582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlNsErr:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the message
589 * @info1: extra information string
590 * @info2: extra information string
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594static void
595xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000597 const xmlChar * info1, const xmlChar * info2,
598 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000599{
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if (ctxt != NULL)
604 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000605 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000606 XML_ERR_ERROR, NULL, 0, (const char *) info1,
607 (const char *) info2, (const char *) info3, 0, 0, msg,
608 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL)
610 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000611}
612
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000613/************************************************************************
614 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000615 * Library wide options *
616 * *
617 ************************************************************************/
618
619/**
620 * xmlHasFeature:
621 * @feature: the feature to be examined
622 *
623 * Examines if the library has been compiled with a given feature.
624 *
625 * Returns a non-zero value if the feature exist, otherwise zero.
626 * Returns zero (0) if the feature does not exist or an unknown
627 * unknown feature is requested, non-zero otherwise.
628 */
629int
630xmlHasFeature(xmlFeature feature)
631{
632 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000633 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000634#ifdef LIBXML_THREAD_ENABLED
635 return(1);
636#else
637 return(0);
638#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000639 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000640#ifdef LIBXML_TREE_ENABLED
641 return(1);
642#else
643 return(0);
644#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000645 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646#ifdef LIBXML_OUTPUT_ENABLED
647 return(1);
648#else
649 return(0);
650#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000651 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000652#ifdef LIBXML_PUSH_ENABLED
653 return(1);
654#else
655 return(0);
656#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000657 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000658#ifdef LIBXML_READER_ENABLED
659 return(1);
660#else
661 return(0);
662#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_PATTERN_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_WRITER_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_SAX1_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_FTP_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_HTTP_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_VALID_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_HTML_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_LEGACY_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_C14N_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_CATALOG_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_XPATH_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_XPTR_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_XINCLUDE_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_ICONV_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_ISO8859X_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_UNICODE_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_REGEXP_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_AUTOMATA_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_EXPR_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_SCHEMAS_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_SCHEMATRON_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_MODULES_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_DEBUG_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef DEBUG_MEMORY_LOCATION
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_DEBUG_RUNTIME
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000813 case XML_WITH_ZLIB:
814#ifdef LIBXML_ZLIB_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819 default:
820 break;
821 }
822 return(0);
823}
824
825/************************************************************************
826 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000827 * SAX2 defaulted attributes handling *
828 * *
829 ************************************************************************/
830
831/**
832 * xmlDetectSAX2:
833 * @ctxt: an XML parser context
834 *
835 * Do the SAX2 detection and specific intialization
836 */
837static void
838xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
839 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000841 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
842 ((ctxt->sax->startElementNs != NULL) ||
843 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000844#else
845 ctxt->sax2 = 1;
846#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847
848 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
849 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
850 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000851 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
852 (ctxt->str_xml_ns == NULL)) {
853 xmlErrMemory(ctxt, NULL);
854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855}
856
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857typedef struct _xmlDefAttrs xmlDefAttrs;
858typedef xmlDefAttrs *xmlDefAttrsPtr;
859struct _xmlDefAttrs {
860 int nbAttrs; /* number of defaulted attributes on that element */
861 int maxAttrs; /* the size of the array */
862 const xmlChar *values[4]; /* array of localname/prefix/values */
863};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864
865/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000866 * xmlAttrNormalizeSpace:
867 * @src: the source string
868 * @dst: the target string
869 *
870 * Normalize the space in non CDATA attribute values:
871 * If the attribute type is not CDATA, then the XML processor MUST further
872 * process the normalized attribute value by discarding any leading and
873 * trailing space (#x20) characters, and by replacing sequences of space
874 * (#x20) characters by a single space (#x20) character.
875 * Note that the size of dst need to be at least src, and if one doesn't need
876 * to preserve dst (and it doesn't come from a dictionary or read-only) then
877 * passing src as dst is just fine.
878 *
879 * Returns a pointer to the normalized value (dst) or NULL if no conversion
880 * is needed.
881 */
882static xmlChar *
883xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
884{
885 if ((src == NULL) || (dst == NULL))
886 return(NULL);
887
888 while (*src == 0x20) src++;
889 while (*src != 0) {
890 if (*src == 0x20) {
891 while (*src == 0x20) src++;
892 if (*src != 0)
893 *dst++ = 0x20;
894 } else {
895 *dst++ = *src++;
896 }
897 }
898 *dst = 0;
899 if (dst == src)
900 return(NULL);
901 return(dst);
902}
903
904/**
905 * xmlAttrNormalizeSpace2:
906 * @src: the source string
907 *
908 * Normalize the space in non CDATA attribute values, a slightly more complex
909 * front end to avoid allocation problems when running on attribute values
910 * coming from the input.
911 *
912 * Returns a pointer to the normalized value (dst) or NULL if no conversion
913 * is needed.
914 */
915static const xmlChar *
916xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
917{
918 int i;
919 int remove_head = 0;
920 int need_realloc = 0;
921 const xmlChar *cur;
922
923 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
924 return(NULL);
925 i = *len;
926 if (i <= 0)
927 return(NULL);
928
929 cur = src;
930 while (*cur == 0x20) {
931 cur++;
932 remove_head++;
933 }
934 while (*cur != 0) {
935 if (*cur == 0x20) {
936 cur++;
937 if ((*cur == 0x20) || (*cur == 0)) {
938 need_realloc = 1;
939 break;
940 }
941 } else
942 cur++;
943 }
944 if (need_realloc) {
945 xmlChar *ret;
946
947 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
948 if (ret == NULL) {
949 xmlErrMemory(ctxt, NULL);
950 return(NULL);
951 }
952 xmlAttrNormalizeSpace(ret, ret);
953 *len = (int) strlen((const char *)ret);
954 return(ret);
955 } else if (remove_head) {
956 *len -= remove_head;
957 return(src + remove_head);
958 }
959 return(NULL);
960}
961
962/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000963 * xmlAddDefAttrs:
964 * @ctxt: an XML parser context
965 * @fullname: the element fullname
966 * @fullattr: the attribute fullname
967 * @value: the attribute value
968 *
969 * Add a defaulted attribute for an element
970 */
971static void
972xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
973 const xmlChar *fullname,
974 const xmlChar *fullattr,
975 const xmlChar *value) {
976 xmlDefAttrsPtr defaults;
977 int len;
978 const xmlChar *name;
979 const xmlChar *prefix;
980
Daniel Veillard6a31b832008-03-26 14:06:44 +0000981 /*
982 * Allows to detect attribute redefinitions
983 */
984 if (ctxt->attsSpecial != NULL) {
985 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
986 return;
987 }
988
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000990 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991 if (ctxt->attsDefault == NULL)
992 goto mem_error;
993 }
994
995 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000996 * split the element name into prefix:localname , the string found
997 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000998 */
999 name = xmlSplitQName3(fullname, &len);
1000 if (name == NULL) {
1001 name = xmlDictLookup(ctxt->dict, fullname, -1);
1002 prefix = NULL;
1003 } else {
1004 name = xmlDictLookup(ctxt->dict, name, -1);
1005 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1006 }
1007
1008 /*
1009 * make sure there is some storage
1010 */
1011 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1012 if (defaults == NULL) {
1013 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001014 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015 if (defaults == NULL)
1016 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001018 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001019 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1020 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001021 xmlDefAttrsPtr temp;
1022
1023 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001025 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001026 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001027 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 defaults->maxAttrs *= 2;
1029 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1030 }
1031
1032 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001033 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 * are within the DTD and hen not associated to namespace names.
1035 */
1036 name = xmlSplitQName3(fullattr, &len);
1037 if (name == NULL) {
1038 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1039 prefix = NULL;
1040 } else {
1041 name = xmlDictLookup(ctxt->dict, name, -1);
1042 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1043 }
1044
1045 defaults->values[4 * defaults->nbAttrs] = name;
1046 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1047 /* intern the string and precompute the end */
1048 len = xmlStrlen(value);
1049 value = xmlDictLookup(ctxt->dict, value, len);
1050 defaults->values[4 * defaults->nbAttrs + 2] = value;
1051 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1052 defaults->nbAttrs++;
1053
1054 return;
1055
1056mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001057 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001058 return;
1059}
1060
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001061/**
1062 * xmlAddSpecialAttr:
1063 * @ctxt: an XML parser context
1064 * @fullname: the element fullname
1065 * @fullattr: the attribute fullname
1066 * @type: the attribute type
1067 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001068 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001069 */
1070static void
1071xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1072 const xmlChar *fullname,
1073 const xmlChar *fullattr,
1074 int type)
1075{
1076 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001077 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001078 if (ctxt->attsSpecial == NULL)
1079 goto mem_error;
1080 }
1081
Daniel Veillardac4118d2008-01-11 05:27:32 +00001082 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1083 return;
1084
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001085 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1086 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001087 return;
1088
1089mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001090 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001091 return;
1092}
1093
Daniel Veillard4432df22003-09-28 18:58:27 +00001094/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001095 * xmlCleanSpecialAttrCallback:
1096 *
1097 * Removes CDATA attributes from the special attribute table
1098 */
1099static void
1100xmlCleanSpecialAttrCallback(void *payload, void *data,
1101 const xmlChar *fullname, const xmlChar *fullattr,
1102 const xmlChar *unused ATTRIBUTE_UNUSED) {
1103 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1104
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001105 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001106 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1107 }
1108}
1109
1110/**
1111 * xmlCleanSpecialAttr:
1112 * @ctxt: an XML parser context
1113 *
1114 * Trim the list of attributes defined to remove all those of type
1115 * CDATA as they are not special. This call should be done when finishing
1116 * to parse the DTD and before starting to parse the document root.
1117 */
1118static void
1119xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1120{
1121 if (ctxt->attsSpecial == NULL)
1122 return;
1123
1124 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1125
1126 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1127 xmlHashFree(ctxt->attsSpecial, NULL);
1128 ctxt->attsSpecial = NULL;
1129 }
1130 return;
1131}
1132
1133/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001134 * xmlCheckLanguageID:
1135 * @lang: pointer to the string value
1136 *
1137 * Checks that the value conforms to the LanguageID production:
1138 *
1139 * NOTE: this is somewhat deprecated, those productions were removed from
1140 * the XML Second edition.
1141 *
1142 * [33] LanguageID ::= Langcode ('-' Subcode)*
1143 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1144 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1145 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1146 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1147 * [38] Subcode ::= ([a-z] | [A-Z])+
1148 *
1149 * Returns 1 if correct 0 otherwise
1150 **/
1151int
1152xmlCheckLanguageID(const xmlChar * lang)
1153{
1154 const xmlChar *cur = lang;
1155
1156 if (cur == NULL)
1157 return (0);
1158 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1159 ((cur[0] == 'I') && (cur[1] == '-'))) {
1160 /*
1161 * IANA code
1162 */
1163 cur += 2;
1164 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1165 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1166 cur++;
1167 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1168 ((cur[0] == 'X') && (cur[1] == '-'))) {
1169 /*
1170 * User code
1171 */
1172 cur += 2;
1173 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1174 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1175 cur++;
1176 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1177 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1178 /*
1179 * ISO639
1180 */
1181 cur++;
1182 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1183 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1184 cur++;
1185 else
1186 return (0);
1187 } else
1188 return (0);
1189 while (cur[0] != 0) { /* non input consuming */
1190 if (cur[0] != '-')
1191 return (0);
1192 cur++;
1193 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1194 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1195 cur++;
1196 else
1197 return (0);
1198 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1199 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1200 cur++;
1201 }
1202 return (1);
1203}
1204
Owen Taylor3473f882001-02-23 17:55:21 +00001205/************************************************************************
1206 * *
1207 * Parser stacks related functions and macros *
1208 * *
1209 ************************************************************************/
1210
1211xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1212 const xmlChar ** str);
1213
Daniel Veillard0fb18932003-09-07 09:14:37 +00001214#ifdef SAX2
1215/**
1216 * nsPush:
1217 * @ctxt: an XML parser context
1218 * @prefix: the namespace prefix or NULL
1219 * @URL: the namespace name
1220 *
1221 * Pushes a new parser namespace on top of the ns stack
1222 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001223 * Returns -1 in case of error, -2 if the namespace should be discarded
1224 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001225 */
1226static int
1227nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1228{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001229 if (ctxt->options & XML_PARSE_NSCLEAN) {
1230 int i;
1231 for (i = 0;i < ctxt->nsNr;i += 2) {
1232 if (ctxt->nsTab[i] == prefix) {
1233 /* in scope */
1234 if (ctxt->nsTab[i + 1] == URL)
1235 return(-2);
1236 /* out of scope keep it */
1237 break;
1238 }
1239 }
1240 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001241 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1242 ctxt->nsMax = 10;
1243 ctxt->nsNr = 0;
1244 ctxt->nsTab = (const xmlChar **)
1245 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1246 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001247 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001248 ctxt->nsMax = 0;
1249 return (-1);
1250 }
1251 } else if (ctxt->nsNr >= ctxt->nsMax) {
1252 ctxt->nsMax *= 2;
1253 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001254 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001255 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1256 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001257 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001258 ctxt->nsMax /= 2;
1259 return (-1);
1260 }
1261 }
1262 ctxt->nsTab[ctxt->nsNr++] = prefix;
1263 ctxt->nsTab[ctxt->nsNr++] = URL;
1264 return (ctxt->nsNr);
1265}
1266/**
1267 * nsPop:
1268 * @ctxt: an XML parser context
1269 * @nr: the number to pop
1270 *
1271 * Pops the top @nr parser prefix/namespace from the ns stack
1272 *
1273 * Returns the number of namespaces removed
1274 */
1275static int
1276nsPop(xmlParserCtxtPtr ctxt, int nr)
1277{
1278 int i;
1279
1280 if (ctxt->nsTab == NULL) return(0);
1281 if (ctxt->nsNr < nr) {
1282 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1283 nr = ctxt->nsNr;
1284 }
1285 if (ctxt->nsNr <= 0)
1286 return (0);
1287
1288 for (i = 0;i < nr;i++) {
1289 ctxt->nsNr--;
1290 ctxt->nsTab[ctxt->nsNr] = NULL;
1291 }
1292 return(nr);
1293}
1294#endif
1295
1296static int
1297xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1298 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001300 int maxatts;
1301
1302 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001303 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001304 atts = (const xmlChar **)
1305 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001306 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001307 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1309 if (attallocs == NULL) goto mem_error;
1310 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001311 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001312 } else if (nr + 5 > ctxt->maxatts) {
1313 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001314 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1315 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001316 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001317 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001318 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1319 (maxatts / 5) * sizeof(int));
1320 if (attallocs == NULL) goto mem_error;
1321 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001322 ctxt->maxatts = maxatts;
1323 }
1324 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001326 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001327 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001328}
1329
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001330/**
1331 * inputPush:
1332 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001333 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001334 *
1335 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001336 *
1337 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001338 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001339int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1341{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001342 if ((ctxt == NULL) || (value == NULL))
1343 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001344 if (ctxt->inputNr >= ctxt->inputMax) {
1345 ctxt->inputMax *= 2;
1346 ctxt->inputTab =
1347 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1348 ctxt->inputMax *
1349 sizeof(ctxt->inputTab[0]));
1350 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001351 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001352 return (0);
1353 }
1354 }
1355 ctxt->inputTab[ctxt->inputNr] = value;
1356 ctxt->input = value;
1357 return (ctxt->inputNr++);
1358}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001359/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001360 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001361 * @ctxt: an XML parser context
1362 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001363 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001364 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001365 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001366 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001367xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001368inputPop(xmlParserCtxtPtr ctxt)
1369{
1370 xmlParserInputPtr ret;
1371
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001372 if (ctxt == NULL)
1373 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001374 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001375 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001376 ctxt->inputNr--;
1377 if (ctxt->inputNr > 0)
1378 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1379 else
1380 ctxt->input = NULL;
1381 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001382 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 return (ret);
1384}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001385/**
1386 * nodePush:
1387 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001388 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001389 *
1390 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001391 *
1392 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001393 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001394int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001395nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1396{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001397 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001398 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001399 xmlNodePtr *tmp;
1400
1401 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1402 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001404 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001405 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001406 return (0);
1407 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001408 ctxt->nodeTab = tmp;
1409 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001410 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001411 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001412 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001413 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1414 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001415 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001416 return(0);
1417 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001418 ctxt->nodeTab[ctxt->nodeNr] = value;
1419 ctxt->node = value;
1420 return (ctxt->nodeNr++);
1421}
1422/**
1423 * nodePop:
1424 * @ctxt: an XML parser context
1425 *
1426 * Pops the top element node from the node stack
1427 *
1428 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001429 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001430xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001431nodePop(xmlParserCtxtPtr ctxt)
1432{
1433 xmlNodePtr ret;
1434
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001435 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001436 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001437 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001438 ctxt->nodeNr--;
1439 if (ctxt->nodeNr > 0)
1440 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1441 else
1442 ctxt->node = NULL;
1443 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001444 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 return (ret);
1446}
Daniel Veillarda2351322004-06-27 12:08:10 +00001447
1448#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001449/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001450 * nameNsPush:
1451 * @ctxt: an XML parser context
1452 * @value: the element name
1453 * @prefix: the element prefix
1454 * @URI: the element namespace name
1455 *
1456 * Pushes a new element name/prefix/URL on top of the name stack
1457 *
1458 * Returns -1 in case of error, the index in the stack otherwise
1459 */
1460static int
1461nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1462 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1463{
1464 if (ctxt->nameNr >= ctxt->nameMax) {
1465 const xmlChar * *tmp;
1466 void **tmp2;
1467 ctxt->nameMax *= 2;
1468 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1469 ctxt->nameMax *
1470 sizeof(ctxt->nameTab[0]));
1471 if (tmp == NULL) {
1472 ctxt->nameMax /= 2;
1473 goto mem_error;
1474 }
1475 ctxt->nameTab = tmp;
1476 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1477 ctxt->nameMax * 3 *
1478 sizeof(ctxt->pushTab[0]));
1479 if (tmp2 == NULL) {
1480 ctxt->nameMax /= 2;
1481 goto mem_error;
1482 }
1483 ctxt->pushTab = tmp2;
1484 }
1485 ctxt->nameTab[ctxt->nameNr] = value;
1486 ctxt->name = value;
1487 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1488 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001489 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001490 return (ctxt->nameNr++);
1491mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001492 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001493 return (-1);
1494}
1495/**
1496 * nameNsPop:
1497 * @ctxt: an XML parser context
1498 *
1499 * Pops the top element/prefix/URI name from the name stack
1500 *
1501 * Returns the name just removed
1502 */
1503static const xmlChar *
1504nameNsPop(xmlParserCtxtPtr ctxt)
1505{
1506 const xmlChar *ret;
1507
1508 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001509 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001510 ctxt->nameNr--;
1511 if (ctxt->nameNr > 0)
1512 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1513 else
1514 ctxt->name = NULL;
1515 ret = ctxt->nameTab[ctxt->nameNr];
1516 ctxt->nameTab[ctxt->nameNr] = NULL;
1517 return (ret);
1518}
Daniel Veillarda2351322004-06-27 12:08:10 +00001519#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001520
1521/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001522 * namePush:
1523 * @ctxt: an XML parser context
1524 * @value: the element name
1525 *
1526 * Pushes a new element name on top of the name stack
1527 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001528 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001529 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001530int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001531namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001532{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001533 if (ctxt == NULL) return (-1);
1534
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001536 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001537 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001538 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001539 ctxt->nameMax *
1540 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001541 if (tmp == NULL) {
1542 ctxt->nameMax /= 2;
1543 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001544 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001545 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001546 }
1547 ctxt->nameTab[ctxt->nameNr] = value;
1548 ctxt->name = value;
1549 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001550mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001551 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001552 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553}
1554/**
1555 * namePop:
1556 * @ctxt: an XML parser context
1557 *
1558 * Pops the top element name from the name stack
1559 *
1560 * Returns the name just removed
1561 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001562const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001563namePop(xmlParserCtxtPtr ctxt)
1564{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001565 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001566
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001567 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1568 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001569 ctxt->nameNr--;
1570 if (ctxt->nameNr > 0)
1571 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1572 else
1573 ctxt->name = NULL;
1574 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001575 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 return (ret);
1577}
Owen Taylor3473f882001-02-23 17:55:21 +00001578
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001579static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001580 if (ctxt->spaceNr >= ctxt->spaceMax) {
1581 ctxt->spaceMax *= 2;
1582 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1583 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1584 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001586 return(0);
1587 }
1588 }
1589 ctxt->spaceTab[ctxt->spaceNr] = val;
1590 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1591 return(ctxt->spaceNr++);
1592}
1593
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001594static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 int ret;
1596 if (ctxt->spaceNr <= 0) return(0);
1597 ctxt->spaceNr--;
1598 if (ctxt->spaceNr > 0)
1599 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1600 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001601 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001602 ret = ctxt->spaceTab[ctxt->spaceNr];
1603 ctxt->spaceTab[ctxt->spaceNr] = -1;
1604 return(ret);
1605}
1606
1607/*
1608 * Macros for accessing the content. Those should be used only by the parser,
1609 * and not exported.
1610 *
1611 * Dirty macros, i.e. one often need to make assumption on the context to
1612 * use them
1613 *
1614 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1615 * To be used with extreme caution since operations consuming
1616 * characters may move the input buffer to a different location !
1617 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1618 * This should be used internally by the parser
1619 * only to compare to ASCII values otherwise it would break when
1620 * running with UTF-8 encoding.
1621 * RAW same as CUR but in the input buffer, bypass any token
1622 * extraction that may have been done
1623 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1624 * to compare on ASCII based substring.
1625 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001626 * strings without newlines within the parser.
1627 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1628 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001629 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1630 *
1631 * NEXT Skip to the next character, this does the proper decoding
1632 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001633 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001634 * CUR_CHAR(l) returns the current unicode character (int), set l
1635 * to the number of xmlChars used for the encoding [0-5].
1636 * CUR_SCHAR same but operate on a string instead of the context
1637 * COPY_BUF copy the current unicode char to the target buffer, increment
1638 * the index
1639 * GROW, SHRINK handling of input buffers
1640 */
1641
Daniel Veillardfdc91562002-07-01 21:52:03 +00001642#define RAW (*ctxt->input->cur)
1643#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001644#define NXT(val) ctxt->input->cur[(val)]
1645#define CUR_PTR ctxt->input->cur
1646
Daniel Veillarda07050d2003-10-19 14:46:32 +00001647#define CMP4( s, c1, c2, c3, c4 ) \
1648 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1649 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1650#define CMP5( s, c1, c2, c3, c4, c5 ) \
1651 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1652#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1653 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1654#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1655 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1656#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1657 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1658#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1659 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1660 ((unsigned char *) s)[ 8 ] == c9 )
1661#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1662 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1663 ((unsigned char *) s)[ 9 ] == c10 )
1664
Owen Taylor3473f882001-02-23 17:55:21 +00001665#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001666 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001667 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001668 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001669 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1670 xmlPopInput(ctxt); \
1671 } while (0)
1672
Daniel Veillard0b787f32004-03-26 17:29:53 +00001673#define SKIPL(val) do { \
1674 int skipl; \
1675 for(skipl=0; skipl<val; skipl++) { \
1676 if (*(ctxt->input->cur) == '\n') { \
1677 ctxt->input->line++; ctxt->input->col = 1; \
1678 } else ctxt->input->col++; \
1679 ctxt->nbChars++; \
1680 ctxt->input->cur++; \
1681 } \
1682 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1683 if ((*ctxt->input->cur == 0) && \
1684 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1685 xmlPopInput(ctxt); \
1686 } while (0)
1687
Daniel Veillarda880b122003-04-21 21:36:41 +00001688#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001689 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1690 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001691 xmlSHRINK (ctxt);
1692
1693static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1694 xmlParserInputShrink(ctxt->input);
1695 if ((*ctxt->input->cur == 0) &&
1696 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1697 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001698 }
Owen Taylor3473f882001-02-23 17:55:21 +00001699
Daniel Veillarda880b122003-04-21 21:36:41 +00001700#define GROW if ((ctxt->progressive == 0) && \
1701 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001702 xmlGROW (ctxt);
1703
1704static void xmlGROW (xmlParserCtxtPtr ctxt) {
1705 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1706 if ((*ctxt->input->cur == 0) &&
1707 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1708 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001709}
Owen Taylor3473f882001-02-23 17:55:21 +00001710
1711#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1712
1713#define NEXT xmlNextChar(ctxt)
1714
Daniel Veillard21a0f912001-02-25 19:54:14 +00001715#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001716 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001717 ctxt->input->cur++; \
1718 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001719 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001720 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1721 }
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723#define NEXTL(l) do { \
1724 if (*(ctxt->input->cur) == '\n') { \
1725 ctxt->input->line++; ctxt->input->col = 1; \
1726 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001727 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001728 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001729 } while (0)
1730
1731#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1732#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1733
1734#define COPY_BUF(l,b,i,v) \
1735 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001736 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001737
1738/**
1739 * xmlSkipBlankChars:
1740 * @ctxt: the XML parser context
1741 *
1742 * skip all blanks character found at that point in the input streams.
1743 * It pops up finished entities in the process if allowable at that point.
1744 *
1745 * Returns the number of space chars skipped
1746 */
1747
1748int
1749xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001750 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001751
1752 /*
1753 * It's Okay to use CUR/NEXT here since all the blanks are on
1754 * the ASCII range.
1755 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001756 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1757 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001758 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001759 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001760 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001761 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001762 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001763 if (*cur == '\n') {
1764 ctxt->input->line++; ctxt->input->col = 1;
1765 }
1766 cur++;
1767 res++;
1768 if (*cur == 0) {
1769 ctxt->input->cur = cur;
1770 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1771 cur = ctxt->input->cur;
1772 }
1773 }
1774 ctxt->input->cur = cur;
1775 } else {
1776 int cur;
1777 do {
1778 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001779 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001780 NEXT;
1781 cur = CUR;
1782 res++;
1783 }
1784 while ((cur == 0) && (ctxt->inputNr > 1) &&
1785 (ctxt->instate != XML_PARSER_COMMENT)) {
1786 xmlPopInput(ctxt);
1787 cur = CUR;
1788 }
1789 /*
1790 * Need to handle support of entities branching here
1791 */
1792 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1793 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1794 }
Owen Taylor3473f882001-02-23 17:55:21 +00001795 return(res);
1796}
1797
1798/************************************************************************
1799 * *
1800 * Commodity functions to handle entities *
1801 * *
1802 ************************************************************************/
1803
1804/**
1805 * xmlPopInput:
1806 * @ctxt: an XML parser context
1807 *
1808 * xmlPopInput: the current input pointed by ctxt->input came to an end
1809 * pop it and return the next char.
1810 *
1811 * Returns the current xmlChar in the parser context
1812 */
1813xmlChar
1814xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001815 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001816 if (xmlParserDebugEntities)
1817 xmlGenericError(xmlGenericErrorContext,
1818 "Popping input %d\n", ctxt->inputNr);
1819 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001820 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001821 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1822 return(xmlPopInput(ctxt));
1823 return(CUR);
1824}
1825
1826/**
1827 * xmlPushInput:
1828 * @ctxt: an XML parser context
1829 * @input: an XML parser input fragment (entity, XML fragment ...).
1830 *
1831 * xmlPushInput: switch to a new input stream which is stacked on top
1832 * of the previous one(s).
1833 */
1834void
1835xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1836 if (input == NULL) return;
1837
1838 if (xmlParserDebugEntities) {
1839 if ((ctxt->input != NULL) && (ctxt->input->filename))
1840 xmlGenericError(xmlGenericErrorContext,
1841 "%s(%d): ", ctxt->input->filename,
1842 ctxt->input->line);
1843 xmlGenericError(xmlGenericErrorContext,
1844 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1845 }
1846 inputPush(ctxt, input);
1847 GROW;
1848}
1849
1850/**
1851 * xmlParseCharRef:
1852 * @ctxt: an XML parser context
1853 *
1854 * parse Reference declarations
1855 *
1856 * [66] CharRef ::= '&#' [0-9]+ ';' |
1857 * '&#x' [0-9a-fA-F]+ ';'
1858 *
1859 * [ WFC: Legal Character ]
1860 * Characters referred to using character references must match the
1861 * production for Char.
1862 *
1863 * Returns the value parsed (as an int), 0 in case of error
1864 */
1865int
1866xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001867 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001868 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001869 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001870
Owen Taylor3473f882001-02-23 17:55:21 +00001871 /*
1872 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1873 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001874 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001875 (NXT(2) == 'x')) {
1876 SKIP(3);
1877 GROW;
1878 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001879 if (count++ > 20) {
1880 count = 0;
1881 GROW;
1882 }
1883 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001884 val = val * 16 + (CUR - '0');
1885 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1886 val = val * 16 + (CUR - 'a') + 10;
1887 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1888 val = val * 16 + (CUR - 'A') + 10;
1889 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001890 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 val = 0;
1892 break;
1893 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001894 if (val > 0x10FFFF)
1895 outofrange = val;
1896
Owen Taylor3473f882001-02-23 17:55:21 +00001897 NEXT;
1898 count++;
1899 }
1900 if (RAW == ';') {
1901 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001902 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001903 ctxt->nbChars ++;
1904 ctxt->input->cur++;
1905 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001906 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001907 SKIP(2);
1908 GROW;
1909 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001910 if (count++ > 20) {
1911 count = 0;
1912 GROW;
1913 }
1914 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001915 val = val * 10 + (CUR - '0');
1916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001917 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001918 val = 0;
1919 break;
1920 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001921 if (val > 0x10FFFF)
1922 outofrange = val;
1923
Owen Taylor3473f882001-02-23 17:55:21 +00001924 NEXT;
1925 count++;
1926 }
1927 if (RAW == ';') {
1928 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001929 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001930 ctxt->nbChars ++;
1931 ctxt->input->cur++;
1932 }
1933 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001934 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001935 }
1936
1937 /*
1938 * [ WFC: Legal Character ]
1939 * Characters referred to using character references must match the
1940 * production for Char.
1941 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001942 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(val);
1944 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001945 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1946 "xmlParseCharRef: invalid xmlChar value %d\n",
1947 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 }
1949 return(0);
1950}
1951
1952/**
1953 * xmlParseStringCharRef:
1954 * @ctxt: an XML parser context
1955 * @str: a pointer to an index in the string
1956 *
1957 * parse Reference declarations, variant parsing from a string rather
1958 * than an an input flow.
1959 *
1960 * [66] CharRef ::= '&#' [0-9]+ ';' |
1961 * '&#x' [0-9a-fA-F]+ ';'
1962 *
1963 * [ WFC: Legal Character ]
1964 * Characters referred to using character references must match the
1965 * production for Char.
1966 *
1967 * Returns the value parsed (as an int), 0 in case of error, str will be
1968 * updated to the current value of the index
1969 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001970static int
Owen Taylor3473f882001-02-23 17:55:21 +00001971xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1972 const xmlChar *ptr;
1973 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001974 unsigned int val = 0;
1975 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001976
1977 if ((str == NULL) || (*str == NULL)) return(0);
1978 ptr = *str;
1979 cur = *ptr;
1980 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1981 ptr += 3;
1982 cur = *ptr;
1983 while (cur != ';') { /* Non input consuming loop */
1984 if ((cur >= '0') && (cur <= '9'))
1985 val = val * 16 + (cur - '0');
1986 else if ((cur >= 'a') && (cur <= 'f'))
1987 val = val * 16 + (cur - 'a') + 10;
1988 else if ((cur >= 'A') && (cur <= 'F'))
1989 val = val * 16 + (cur - 'A') + 10;
1990 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001991 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001992 val = 0;
1993 break;
1994 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001995 if (val > 0x10FFFF)
1996 outofrange = val;
1997
Owen Taylor3473f882001-02-23 17:55:21 +00001998 ptr++;
1999 cur = *ptr;
2000 }
2001 if (cur == ';')
2002 ptr++;
2003 } else if ((cur == '&') && (ptr[1] == '#')){
2004 ptr += 2;
2005 cur = *ptr;
2006 while (cur != ';') { /* Non input consuming loops */
2007 if ((cur >= '0') && (cur <= '9'))
2008 val = val * 10 + (cur - '0');
2009 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 val = 0;
2012 break;
2013 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002014 if (val > 0x10FFFF)
2015 outofrange = val;
2016
Owen Taylor3473f882001-02-23 17:55:21 +00002017 ptr++;
2018 cur = *ptr;
2019 }
2020 if (cur == ';')
2021 ptr++;
2022 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002023 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002024 return(0);
2025 }
2026 *str = ptr;
2027
2028 /*
2029 * [ WFC: Legal Character ]
2030 * Characters referred to using character references must match the
2031 * production for Char.
2032 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002033 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(val);
2035 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002036 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2037 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2038 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002039 }
2040 return(0);
2041}
2042
2043/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002044 * xmlNewBlanksWrapperInputStream:
2045 * @ctxt: an XML parser context
2046 * @entity: an Entity pointer
2047 *
2048 * Create a new input stream for wrapping
2049 * blanks around a PEReference
2050 *
2051 * Returns the new input stream or NULL
2052 */
2053
2054static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2055
Daniel Veillardf4862f02002-09-10 11:13:43 +00002056static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002057xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2058 xmlParserInputPtr input;
2059 xmlChar *buffer;
2060 size_t length;
2061 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2063 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002064 return(NULL);
2065 }
2066 if (xmlParserDebugEntities)
2067 xmlGenericError(xmlGenericErrorContext,
2068 "new blanks wrapper for entity: %s\n", entity->name);
2069 input = xmlNewInputStream(ctxt);
2070 if (input == NULL) {
2071 return(NULL);
2072 }
2073 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002074 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002075 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002076 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002077 return(NULL);
2078 }
2079 buffer [0] = ' ';
2080 buffer [1] = '%';
2081 buffer [length-3] = ';';
2082 buffer [length-2] = ' ';
2083 buffer [length-1] = 0;
2084 memcpy(buffer + 2, entity->name, length - 5);
2085 input->free = deallocblankswrapper;
2086 input->base = buffer;
2087 input->cur = buffer;
2088 input->length = length;
2089 input->end = &buffer[length];
2090 return(input);
2091}
2092
2093/**
Owen Taylor3473f882001-02-23 17:55:21 +00002094 * xmlParserHandlePEReference:
2095 * @ctxt: the parser context
2096 *
2097 * [69] PEReference ::= '%' Name ';'
2098 *
2099 * [ WFC: No Recursion ]
2100 * A parsed entity must not contain a recursive
2101 * reference to itself, either directly or indirectly.
2102 *
2103 * [ WFC: Entity Declared ]
2104 * In a document without any DTD, a document with only an internal DTD
2105 * subset which contains no parameter entity references, or a document
2106 * with "standalone='yes'", ... ... The declaration of a parameter
2107 * entity must precede any reference to it...
2108 *
2109 * [ VC: Entity Declared ]
2110 * In a document with an external subset or external parameter entities
2111 * with "standalone='no'", ... ... The declaration of a parameter entity
2112 * must precede any reference to it...
2113 *
2114 * [ WFC: In DTD ]
2115 * Parameter-entity references may only appear in the DTD.
2116 * NOTE: misleading but this is handled.
2117 *
2118 * A PEReference may have been detected in the current input stream
2119 * the handling is done accordingly to
2120 * http://www.w3.org/TR/REC-xml#entproc
2121 * i.e.
2122 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002123 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002124 */
2125void
2126xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002127 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002128 xmlEntityPtr entity = NULL;
2129 xmlParserInputPtr input;
2130
Owen Taylor3473f882001-02-23 17:55:21 +00002131 if (RAW != '%') return;
2132 switch(ctxt->instate) {
2133 case XML_PARSER_CDATA_SECTION:
2134 return;
2135 case XML_PARSER_COMMENT:
2136 return;
2137 case XML_PARSER_START_TAG:
2138 return;
2139 case XML_PARSER_END_TAG:
2140 return;
2141 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002142 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002143 return;
2144 case XML_PARSER_PROLOG:
2145 case XML_PARSER_START:
2146 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002147 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return;
2149 case XML_PARSER_ENTITY_DECL:
2150 case XML_PARSER_CONTENT:
2151 case XML_PARSER_ATTRIBUTE_VALUE:
2152 case XML_PARSER_PI:
2153 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002154 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002155 /* we just ignore it there */
2156 return;
2157 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002158 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002159 return;
2160 case XML_PARSER_ENTITY_VALUE:
2161 /*
2162 * NOTE: in the case of entity values, we don't do the
2163 * substitution here since we need the literal
2164 * entity value to be able to save the internal
2165 * subset of the document.
2166 * This will be handled by xmlStringDecodeEntities
2167 */
2168 return;
2169 case XML_PARSER_DTD:
2170 /*
2171 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2172 * In the internal DTD subset, parameter-entity references
2173 * can occur only where markup declarations can occur, not
2174 * within markup declarations.
2175 * In that case this is handled in xmlParseMarkupDecl
2176 */
2177 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2178 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002179 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002180 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002181 break;
2182 case XML_PARSER_IGNORE:
2183 return;
2184 }
2185
2186 NEXT;
2187 name = xmlParseName(ctxt);
2188 if (xmlParserDebugEntities)
2189 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002190 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002191 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002192 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002193 } else {
2194 if (RAW == ';') {
2195 NEXT;
2196 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2197 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2198 if (entity == NULL) {
2199
2200 /*
2201 * [ WFC: Entity Declared ]
2202 * In a document without any DTD, a document with only an
2203 * internal DTD subset which contains no parameter entity
2204 * references, or a document with "standalone='yes'", ...
2205 * ... The declaration of a parameter entity must precede
2206 * any reference to it...
2207 */
2208 if ((ctxt->standalone == 1) ||
2209 ((ctxt->hasExternalSubset == 0) &&
2210 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002211 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002212 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002213 } else {
2214 /*
2215 * [ VC: Entity Declared ]
2216 * In a document with an external subset or external
2217 * parameter entities with "standalone='no'", ...
2218 * ... The declaration of a parameter entity must precede
2219 * any reference to it...
2220 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002221 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2222 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2223 "PEReference: %%%s; not found\n",
2224 name);
2225 } else
2226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2227 "PEReference: %%%s; not found\n",
2228 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002229 ctxt->valid = 0;
2230 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002231 } else if (ctxt->input->free != deallocblankswrapper) {
2232 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2233 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002234 } else {
2235 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2236 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002237 xmlChar start[4];
2238 xmlCharEncoding enc;
2239
Owen Taylor3473f882001-02-23 17:55:21 +00002240 /*
2241 * handle the extra spaces added before and after
2242 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002243 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002244 */
2245 input = xmlNewEntityInputStream(ctxt, entity);
2246 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002247
2248 /*
2249 * Get the 4 first bytes and decode the charset
2250 * if enc != XML_CHAR_ENCODING_NONE
2251 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002252 * Note that, since we may have some non-UTF8
2253 * encoding (like UTF16, bug 135229), the 'length'
2254 * is not known, but we can calculate based upon
2255 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002256 */
2257 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002258 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002259 start[0] = RAW;
2260 start[1] = NXT(1);
2261 start[2] = NXT(2);
2262 start[3] = NXT(3);
2263 enc = xmlDetectCharEncoding(start, 4);
2264 if (enc != XML_CHAR_ENCODING_NONE) {
2265 xmlSwitchEncoding(ctxt, enc);
2266 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002267 }
2268
Owen Taylor3473f882001-02-23 17:55:21 +00002269 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002270 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2271 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002272 xmlParseTextDecl(ctxt);
2273 }
Owen Taylor3473f882001-02-23 17:55:21 +00002274 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002275 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2276 "PEReference: %s is not a parameter entity\n",
2277 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002278 }
2279 }
2280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002281 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002282 }
Owen Taylor3473f882001-02-23 17:55:21 +00002283 }
2284}
2285
2286/*
2287 * Macro used to grow the current buffer.
2288 */
2289#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002290 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002291 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002292 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002293 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002294 if (tmp == NULL) goto mem_error; \
2295 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002296}
2297
2298/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002299 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002300 * @ctxt: the parser context
2301 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002302 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002303 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2304 * @end: an end marker xmlChar, 0 if none
2305 * @end2: an end marker xmlChar, 0 if none
2306 * @end3: an end marker xmlChar, 0 if none
2307 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002308 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002309 *
2310 * [67] Reference ::= EntityRef | CharRef
2311 *
2312 * [69] PEReference ::= '%' Name ';'
2313 *
2314 * Returns A newly allocated string with the substitution done. The caller
2315 * must deallocate it !
2316 */
2317xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002318xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 xmlChar *buffer = NULL;
2321 int buffer_size = 0;
2322
2323 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002324 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlEntityPtr ent;
2326 int c,l;
2327 int nbchars = 0;
2328
Daniel Veillarda82b1822004-11-08 16:24:57 +00002329 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002330 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002331 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002332
2333 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002334 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002335 return(NULL);
2336 }
2337
2338 /*
2339 * allocate a translation buffer.
2340 */
2341 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002342 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002343 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002344
2345 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002346 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002347 * we are operating on already parsed values.
2348 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002349 if (str < last)
2350 c = CUR_SCHAR(str, l);
2351 else
2352 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002353 while ((c != 0) && (c != end) && /* non input consuming loop */
2354 (c != end2) && (c != end3)) {
2355
2356 if (c == 0) break;
2357 if ((c == '&') && (str[1] == '#')) {
2358 int val = xmlParseStringCharRef(ctxt, &str);
2359 if (val != 0) {
2360 COPY_BUF(0,buffer,nbchars,val);
2361 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002362 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2363 growBuffer(buffer);
2364 }
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2366 if (xmlParserDebugEntities)
2367 xmlGenericError(xmlGenericErrorContext,
2368 "String decoding Entity Reference: %.30s\n",
2369 str);
2370 ent = xmlParseStringEntityRef(ctxt, &str);
2371 if ((ent != NULL) &&
2372 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2373 if (ent->content != NULL) {
2374 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002375 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2376 growBuffer(buffer);
2377 }
Owen Taylor3473f882001-02-23 17:55:21 +00002378 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002379 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2380 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002381 }
2382 } else if ((ent != NULL) && (ent->content != NULL)) {
2383 xmlChar *rep;
2384
2385 ctxt->depth++;
2386 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2387 0, 0, 0);
2388 ctxt->depth--;
2389 if (rep != NULL) {
2390 current = rep;
2391 while (*current != 0) { /* non input consuming loop */
2392 buffer[nbchars++] = *current++;
2393 if (nbchars >
2394 buffer_size - XML_PARSER_BUFFER_SIZE) {
2395 growBuffer(buffer);
2396 }
2397 }
2398 xmlFree(rep);
2399 }
2400 } else if (ent != NULL) {
2401 int i = xmlStrlen(ent->name);
2402 const xmlChar *cur = ent->name;
2403
2404 buffer[nbchars++] = '&';
2405 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2406 growBuffer(buffer);
2407 }
2408 for (;i > 0;i--)
2409 buffer[nbchars++] = *cur++;
2410 buffer[nbchars++] = ';';
2411 }
2412 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2413 if (xmlParserDebugEntities)
2414 xmlGenericError(xmlGenericErrorContext,
2415 "String decoding PE Reference: %.30s\n", str);
2416 ent = xmlParseStringPEReference(ctxt, &str);
2417 if (ent != NULL) {
2418 xmlChar *rep;
2419
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002420 if (ent->content == NULL) {
2421 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2422 }
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 ctxt->depth++;
2425 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2426 0, 0, 0);
2427 ctxt->depth--;
2428 if (rep != NULL) {
2429 current = rep;
2430 while (*current != 0) { /* non input consuming loop */
2431 buffer[nbchars++] = *current++;
2432 if (nbchars >
2433 buffer_size - XML_PARSER_BUFFER_SIZE) {
2434 growBuffer(buffer);
2435 }
2436 }
2437 xmlFree(rep);
2438 }
2439 }
2440 } else {
2441 COPY_BUF(l,buffer,nbchars,c);
2442 str += l;
2443 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2444 growBuffer(buffer);
2445 }
2446 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002447 if (str < last)
2448 c = CUR_SCHAR(str, l);
2449 else
2450 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002451 }
2452 buffer[nbchars++] = 0;
2453 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002454
2455mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002456 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002457 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002458}
2459
Daniel Veillarde57ec792003-09-10 10:50:59 +00002460/**
2461 * xmlStringDecodeEntities:
2462 * @ctxt: the parser context
2463 * @str: the input string
2464 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2465 * @end: an end marker xmlChar, 0 if none
2466 * @end2: an end marker xmlChar, 0 if none
2467 * @end3: an end marker xmlChar, 0 if none
2468 *
2469 * Takes a entity string content and process to do the adequate substitutions.
2470 *
2471 * [67] Reference ::= EntityRef | CharRef
2472 *
2473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * Returns A newly allocated string with the substitution done. The caller
2476 * must deallocate it !
2477 */
2478xmlChar *
2479xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2480 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002482 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2483 end, end2, end3));
2484}
Owen Taylor3473f882001-02-23 17:55:21 +00002485
2486/************************************************************************
2487 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * Commodity functions, cleanup needed ? *
2489 * *
2490 ************************************************************************/
2491
2492/**
2493 * areBlanks:
2494 * @ctxt: an XML parser context
2495 * @str: a xmlChar *
2496 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002497 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002498 *
2499 * Is this a sequence of blank chars that one can ignore ?
2500 *
2501 * Returns 1 if ignorable 0 otherwise.
2502 */
2503
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002504static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2505 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002506 int i, ret;
2507 xmlNodePtr lastChild;
2508
Daniel Veillard05c13a22001-09-09 08:38:09 +00002509 /*
2510 * Don't spend time trying to differentiate them, the same callback is
2511 * used !
2512 */
2513 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002514 return(0);
2515
Owen Taylor3473f882001-02-23 17:55:21 +00002516 /*
2517 * Check for xml:space value.
2518 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002519 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2520 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002521 return(0);
2522
2523 /*
2524 * Check that the string is made of blanks
2525 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002526 if (blank_chars == 0) {
2527 for (i = 0;i < len;i++)
2528 if (!(IS_BLANK_CH(str[i]))) return(0);
2529 }
Owen Taylor3473f882001-02-23 17:55:21 +00002530
2531 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002532 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002533 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002534 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002535 if (ctxt->myDoc != NULL) {
2536 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2537 if (ret == 0) return(1);
2538 if (ret == 1) return(0);
2539 }
2540
2541 /*
2542 * Otherwise, heuristic :-\
2543 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002544 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if ((ctxt->node->children == NULL) &&
2546 (RAW == '<') && (NXT(1) == '/')) return(0);
2547
2548 lastChild = xmlGetLastChild(ctxt->node);
2549 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002550 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2551 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 } else if (xmlNodeIsText(lastChild))
2553 return(0);
2554 else if ((ctxt->node->children != NULL) &&
2555 (xmlNodeIsText(ctxt->node->children)))
2556 return(0);
2557 return(1);
2558}
2559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/************************************************************************
2561 * *
2562 * Extra stuff for namespace support *
2563 * Relates to http://www.w3.org/TR/WD-xml-names *
2564 * *
2565 ************************************************************************/
2566
2567/**
2568 * xmlSplitQName:
2569 * @ctxt: an XML parser context
2570 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002571 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * parse an UTF8 encoded XML qualified name string
2574 *
2575 * [NS 5] QName ::= (Prefix ':')? LocalPart
2576 *
2577 * [NS 6] Prefix ::= NCName
2578 *
2579 * [NS 7] LocalPart ::= NCName
2580 *
2581 * Returns the local part, and prefix is updated
2582 * to get the Prefix if any.
2583 */
2584
2585xmlChar *
2586xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2587 xmlChar buf[XML_MAX_NAMELEN + 5];
2588 xmlChar *buffer = NULL;
2589 int len = 0;
2590 int max = XML_MAX_NAMELEN;
2591 xmlChar *ret = NULL;
2592 const xmlChar *cur = name;
2593 int c;
2594
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002595 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002596 *prefix = NULL;
2597
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002598 if (cur == NULL) return(NULL);
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600#ifndef XML_XML_NAMESPACE
2601 /* xml: prefix is not really a namespace */
2602 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2603 (cur[2] == 'l') && (cur[3] == ':'))
2604 return(xmlStrdup(name));
2605#endif
2606
Daniel Veillard597bc482003-07-24 16:08:28 +00002607 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002608 if (cur[0] == ':')
2609 return(xmlStrdup(name));
2610
2611 c = *cur++;
2612 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2613 buf[len++] = c;
2614 c = *cur++;
2615 }
2616 if (len >= max) {
2617 /*
2618 * Okay someone managed to make a huge name, so he's ready to pay
2619 * for the processing speed.
2620 */
2621 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002622
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002623 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002624 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002625 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002626 return(NULL);
2627 }
2628 memcpy(buffer, buf, len);
2629 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2630 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002631 xmlChar *tmp;
2632
Owen Taylor3473f882001-02-23 17:55:21 +00002633 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002634 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002635 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002636 if (tmp == NULL) {
2637 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002638 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002639 return(NULL);
2640 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002641 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002642 }
2643 buffer[len++] = c;
2644 c = *cur++;
2645 }
2646 buffer[len] = 0;
2647 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002648
Daniel Veillard597bc482003-07-24 16:08:28 +00002649 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002650 if (buffer != NULL)
2651 xmlFree(buffer);
2652 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002653 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002654 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002655
Owen Taylor3473f882001-02-23 17:55:21 +00002656 if (buffer == NULL)
2657 ret = xmlStrndup(buf, len);
2658 else {
2659 ret = buffer;
2660 buffer = NULL;
2661 max = XML_MAX_NAMELEN;
2662 }
2663
2664
2665 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002666 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002667 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002668 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002669 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002670 }
Owen Taylor3473f882001-02-23 17:55:21 +00002671 len = 0;
2672
Daniel Veillardbb284f42002-10-16 18:02:47 +00002673 /*
2674 * Check that the first character is proper to start
2675 * a new name
2676 */
2677 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2678 ((c >= 0x41) && (c <= 0x5A)) ||
2679 (c == '_') || (c == ':'))) {
2680 int l;
2681 int first = CUR_SCHAR(cur, l);
2682
2683 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002684 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002685 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002686 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002687 }
2688 }
2689 cur++;
2690
Owen Taylor3473f882001-02-23 17:55:21 +00002691 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2692 buf[len++] = c;
2693 c = *cur++;
2694 }
2695 if (len >= max) {
2696 /*
2697 * Okay someone managed to make a huge name, so he's ready to pay
2698 * for the processing speed.
2699 */
2700 max = len * 2;
2701
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002702 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002703 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002704 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
2707 memcpy(buffer, buf, len);
2708 while (c != 0) { /* tested bigname2.xml */
2709 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002710 xmlChar *tmp;
2711
Owen Taylor3473f882001-02-23 17:55:21 +00002712 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002713 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002714 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002715 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002716 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002717 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002718 return(NULL);
2719 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002720 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 }
2722 buffer[len++] = c;
2723 c = *cur++;
2724 }
2725 buffer[len] = 0;
2726 }
2727
2728 if (buffer == NULL)
2729 ret = xmlStrndup(buf, len);
2730 else {
2731 ret = buffer;
2732 }
2733 }
2734
2735 return(ret);
2736}
2737
2738/************************************************************************
2739 * *
2740 * The parser itself *
2741 * Relates to http://www.w3.org/TR/REC-xml *
2742 * *
2743 ************************************************************************/
2744
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002745static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002746static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002747 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002748
Owen Taylor3473f882001-02-23 17:55:21 +00002749/**
2750 * xmlParseName:
2751 * @ctxt: an XML parser context
2752 *
2753 * parse an XML name.
2754 *
2755 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2756 * CombiningChar | Extender
2757 *
2758 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2759 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002760 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002761 *
2762 * Returns the Name parsed or NULL
2763 */
2764
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002765const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002766xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002767 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002768 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002769 int count = 0;
2770
2771 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772
2773 /*
2774 * Accelerator for simple ASCII names
2775 */
2776 in = ctxt->input->cur;
2777 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2778 ((*in >= 0x41) && (*in <= 0x5A)) ||
2779 (*in == '_') || (*in == ':')) {
2780 in++;
2781 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2782 ((*in >= 0x41) && (*in <= 0x5A)) ||
2783 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002784 (*in == '_') || (*in == '-') ||
2785 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002786 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002787 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002788 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002789 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002790 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002791 ctxt->nbChars += count;
2792 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002793 if (ret == NULL)
2794 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002795 return(ret);
2796 }
2797 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002798 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002799}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002800
Daniel Veillard46de64e2002-05-29 08:21:33 +00002801/**
2802 * xmlParseNameAndCompare:
2803 * @ctxt: an XML parser context
2804 *
2805 * parse an XML name and compares for match
2806 * (specialized for endtag parsing)
2807 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002808 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2809 * and the name for mismatch
2810 */
2811
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002812static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002813xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002814 register const xmlChar *cmp = other;
2815 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002816 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002817
2818 GROW;
2819
2820 in = ctxt->input->cur;
2821 while (*in != 0 && *in == *cmp) {
2822 ++in;
2823 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002824 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002825 }
William M. Brack76e95df2003-10-18 16:20:14 +00002826 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002827 /* success */
2828 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002829 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002830 }
2831 /* failure (or end of input buffer), check with full function */
2832 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002833 /* strings coming from the dictionnary direct compare possible */
2834 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002835 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002836 }
2837 return ret;
2838}
2839
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002840static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002841xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002842 int len = 0, l;
2843 int c;
2844 int count = 0;
2845
2846 /*
2847 * Handler for more complex cases
2848 */
2849 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 c = CUR_CHAR(l);
2851 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2852 (!IS_LETTER(c) && (c != '_') &&
2853 (c != ':'))) {
2854 return(NULL);
2855 }
2856
2857 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002858 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002859 (c == '.') || (c == '-') ||
2860 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002861 (IS_COMBINING(c)) ||
2862 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002863 if (count++ > 100) {
2864 count = 0;
2865 GROW;
2866 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002867 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002868 NEXTL(l);
2869 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002870 }
Daniel Veillard96688262005-08-23 18:14:12 +00002871 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2872 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002873 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002874}
2875
2876/**
2877 * xmlParseStringName:
2878 * @ctxt: an XML parser context
2879 * @str: a pointer to the string pointer (IN/OUT)
2880 *
2881 * parse an XML name.
2882 *
2883 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2884 * CombiningChar | Extender
2885 *
2886 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2887 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002888 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002889 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002890 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002891 * is updated to the current location in the string.
2892 */
2893
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002894static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002895xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2896 xmlChar buf[XML_MAX_NAMELEN + 5];
2897 const xmlChar *cur = *str;
2898 int len = 0, l;
2899 int c;
2900
2901 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002902 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002903 (c != ':')) {
2904 return(NULL);
2905 }
2906
William M. Brack871611b2003-10-18 04:53:14 +00002907 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002908 (c == '.') || (c == '-') ||
2909 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002910 (IS_COMBINING(c)) ||
2911 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002912 COPY_BUF(l,buf,len,c);
2913 cur += l;
2914 c = CUR_SCHAR(cur, l);
2915 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2916 /*
2917 * Okay someone managed to make a huge name, so he's ready to pay
2918 * for the processing speed.
2919 */
2920 xmlChar *buffer;
2921 int max = len * 2;
2922
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002923 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002924 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002925 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002926 return(NULL);
2927 }
2928 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002929 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002930 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002931 (c == '.') || (c == '-') ||
2932 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002933 (IS_COMBINING(c)) ||
2934 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002935 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002936 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002937 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002938 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002939 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002940 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002941 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002943 return(NULL);
2944 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002946 }
2947 COPY_BUF(l,buffer,len,c);
2948 cur += l;
2949 c = CUR_SCHAR(cur, l);
2950 }
2951 buffer[len] = 0;
2952 *str = cur;
2953 return(buffer);
2954 }
2955 }
2956 *str = cur;
2957 return(xmlStrndup(buf, len));
2958}
2959
2960/**
2961 * xmlParseNmtoken:
2962 * @ctxt: an XML parser context
2963 *
2964 * parse an XML Nmtoken.
2965 *
2966 * [7] Nmtoken ::= (NameChar)+
2967 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002968 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002969 *
2970 * Returns the Nmtoken parsed or NULL
2971 */
2972
2973xmlChar *
2974xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2975 xmlChar buf[XML_MAX_NAMELEN + 5];
2976 int len = 0, l;
2977 int c;
2978 int count = 0;
2979
2980 GROW;
2981 c = CUR_CHAR(l);
2982
William M. Brack871611b2003-10-18 04:53:14 +00002983 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002984 (c == '.') || (c == '-') ||
2985 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002986 (IS_COMBINING(c)) ||
2987 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002988 if (count++ > 100) {
2989 count = 0;
2990 GROW;
2991 }
2992 COPY_BUF(l,buf,len,c);
2993 NEXTL(l);
2994 c = CUR_CHAR(l);
2995 if (len >= XML_MAX_NAMELEN) {
2996 /*
2997 * Okay someone managed to make a huge token, so he's ready to pay
2998 * for the processing speed.
2999 */
3000 xmlChar *buffer;
3001 int max = len * 2;
3002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00003009 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00003010 (c == '.') || (c == '-') ||
3011 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00003012 (IS_COMBINING(c)) ||
3013 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00003014 if (count++ > 100) {
3015 count = 0;
3016 GROW;
3017 }
3018 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 xmlChar *tmp;
3020
Owen Taylor3473f882001-02-23 17:55:21 +00003021 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003022 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003023 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003024 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003025 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003026 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003029 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003030 }
3031 COPY_BUF(l,buffer,len,c);
3032 NEXTL(l);
3033 c = CUR_CHAR(l);
3034 }
3035 buffer[len] = 0;
3036 return(buffer);
3037 }
3038 }
3039 if (len == 0)
3040 return(NULL);
3041 return(xmlStrndup(buf, len));
3042}
3043
3044/**
3045 * xmlParseEntityValue:
3046 * @ctxt: an XML parser context
3047 * @orig: if non-NULL store a copy of the original entity value
3048 *
3049 * parse a value for ENTITY declarations
3050 *
3051 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3052 * "'" ([^%&'] | PEReference | Reference)* "'"
3053 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003054 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003055 */
3056
3057xmlChar *
3058xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3059 xmlChar *buf = NULL;
3060 int len = 0;
3061 int size = XML_PARSER_BUFFER_SIZE;
3062 int c, l;
3063 xmlChar stop;
3064 xmlChar *ret = NULL;
3065 const xmlChar *cur = NULL;
3066 xmlParserInputPtr input;
3067
3068 if (RAW == '"') stop = '"';
3069 else if (RAW == '\'') stop = '\'';
3070 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003071 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003072 return(NULL);
3073 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003074 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003076 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003077 return(NULL);
3078 }
3079
3080 /*
3081 * The content of the entity definition is copied in a buffer.
3082 */
3083
3084 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3085 input = ctxt->input;
3086 GROW;
3087 NEXT;
3088 c = CUR_CHAR(l);
3089 /*
3090 * NOTE: 4.4.5 Included in Literal
3091 * When a parameter entity reference appears in a literal entity
3092 * value, ... a single or double quote character in the replacement
3093 * text is always treated as a normal data character and will not
3094 * terminate the literal.
3095 * In practice it means we stop the loop only when back at parsing
3096 * the initial entity and the quote is found
3097 */
William M. Brack871611b2003-10-18 04:53:14 +00003098 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003099 (ctxt->input != input))) {
3100 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003101 xmlChar *tmp;
3102
Owen Taylor3473f882001-02-23 17:55:21 +00003103 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3105 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003107 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003108 return(NULL);
3109 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003110 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003111 }
3112 COPY_BUF(l,buf,len,c);
3113 NEXTL(l);
3114 /*
3115 * Pop-up of finished entities.
3116 */
3117 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3118 xmlPopInput(ctxt);
3119
3120 GROW;
3121 c = CUR_CHAR(l);
3122 if (c == 0) {
3123 GROW;
3124 c = CUR_CHAR(l);
3125 }
3126 }
3127 buf[len] = 0;
3128
3129 /*
3130 * Raise problem w.r.t. '&' and '%' being used in non-entities
3131 * reference constructs. Note Charref will be handled in
3132 * xmlStringDecodeEntities()
3133 */
3134 cur = buf;
3135 while (*cur != 0) { /* non input consuming */
3136 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3137 xmlChar *name;
3138 xmlChar tmp = *cur;
3139
3140 cur++;
3141 name = xmlParseStringName(ctxt, &cur);
3142 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003143 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003144 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003145 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003146 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003147 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3148 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003149 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003150 }
3151 if (name != NULL)
3152 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003153 if (*cur == 0)
3154 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003155 }
3156 cur++;
3157 }
3158
3159 /*
3160 * Then PEReference entities are substituted.
3161 */
3162 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003163 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003164 xmlFree(buf);
3165 } else {
3166 NEXT;
3167 /*
3168 * NOTE: 4.4.7 Bypassed
3169 * When a general entity reference appears in the EntityValue in
3170 * an entity declaration, it is bypassed and left as is.
3171 * so XML_SUBSTITUTE_REF is not set here.
3172 */
3173 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3174 0, 0, 0);
3175 if (orig != NULL)
3176 *orig = buf;
3177 else
3178 xmlFree(buf);
3179 }
3180
3181 return(ret);
3182}
3183
3184/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003185 * xmlParseAttValueComplex:
3186 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003187 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003188 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003189 *
3190 * parse a value for an attribute, this is the fallback function
3191 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003192 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003193 *
3194 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3195 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003196static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003197xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003198 xmlChar limit = 0;
3199 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003200 int len = 0;
3201 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003203 xmlChar *current = NULL;
3204 xmlEntityPtr ent;
3205
Owen Taylor3473f882001-02-23 17:55:21 +00003206 if (NXT(0) == '"') {
3207 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3208 limit = '"';
3209 NEXT;
3210 } else if (NXT(0) == '\'') {
3211 limit = '\'';
3212 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3213 NEXT;
3214 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003215 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(NULL);
3217 }
3218
3219 /*
3220 * allocate a translation buffer.
3221 */
3222 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003223 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003224 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003225
3226 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003227 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003228 */
3229 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003230 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003231 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003232 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003233 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003234 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003235 if (NXT(1) == '#') {
3236 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003237
Owen Taylor3473f882001-02-23 17:55:21 +00003238 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003239 if (ctxt->replaceEntities) {
3240 if (len > buf_size - 10) {
3241 growBuffer(buf);
3242 }
3243 buf[len++] = '&';
3244 } else {
3245 /*
3246 * The reparsing will be done in xmlStringGetNodeList()
3247 * called by the attribute() function in SAX.c
3248 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003249 if (len > buf_size - 10) {
3250 growBuffer(buf);
3251 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003252 buf[len++] = '&';
3253 buf[len++] = '#';
3254 buf[len++] = '3';
3255 buf[len++] = '8';
3256 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003257 }
3258 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003259 if (len > buf_size - 10) {
3260 growBuffer(buf);
3261 }
Owen Taylor3473f882001-02-23 17:55:21 +00003262 len += xmlCopyChar(0, &buf[len], val);
3263 }
3264 } else {
3265 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003266 if ((ent != NULL) &&
3267 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3268 if (len > buf_size - 10) {
3269 growBuffer(buf);
3270 }
3271 if ((ctxt->replaceEntities == 0) &&
3272 (ent->content[0] == '&')) {
3273 buf[len++] = '&';
3274 buf[len++] = '#';
3275 buf[len++] = '3';
3276 buf[len++] = '8';
3277 buf[len++] = ';';
3278 } else {
3279 buf[len++] = ent->content[0];
3280 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003281 } else if ((ent != NULL) &&
3282 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003283 xmlChar *rep;
3284
3285 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3286 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003287 XML_SUBSTITUTE_REF,
3288 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003289 if (rep != NULL) {
3290 current = rep;
3291 while (*current != 0) { /* non input consuming */
3292 buf[len++] = *current++;
3293 if (len > buf_size - 10) {
3294 growBuffer(buf);
3295 }
3296 }
3297 xmlFree(rep);
3298 }
3299 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003300 if (len > buf_size - 10) {
3301 growBuffer(buf);
3302 }
Owen Taylor3473f882001-02-23 17:55:21 +00003303 if (ent->content != NULL)
3304 buf[len++] = ent->content[0];
3305 }
3306 } else if (ent != NULL) {
3307 int i = xmlStrlen(ent->name);
3308 const xmlChar *cur = ent->name;
3309
3310 /*
3311 * This may look absurd but is needed to detect
3312 * entities problems
3313 */
3314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3315 (ent->content != NULL)) {
3316 xmlChar *rep;
3317 rep = xmlStringDecodeEntities(ctxt, ent->content,
3318 XML_SUBSTITUTE_REF, 0, 0, 0);
3319 if (rep != NULL)
3320 xmlFree(rep);
3321 }
3322
3323 /*
3324 * Just output the reference
3325 */
3326 buf[len++] = '&';
3327 if (len > buf_size - i - 10) {
3328 growBuffer(buf);
3329 }
3330 for (;i > 0;i--)
3331 buf[len++] = *cur++;
3332 buf[len++] = ';';
3333 }
3334 }
3335 } else {
3336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003337 if ((len != 0) || (!normalize)) {
3338 if ((!normalize) || (!in_space)) {
3339 COPY_BUF(l,buf,len,0x20);
3340 if (len > buf_size - 10) {
3341 growBuffer(buf);
3342 }
3343 }
3344 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003345 }
3346 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003347 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003348 COPY_BUF(l,buf,len,c);
3349 if (len > buf_size - 10) {
3350 growBuffer(buf);
3351 }
3352 }
3353 NEXTL(l);
3354 }
3355 GROW;
3356 c = CUR_CHAR(l);
3357 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003358 if ((in_space) && (normalize)) {
3359 while (buf[len - 1] == 0x20) len--;
3360 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003361 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003363 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003364 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003365 if ((c != 0) && (!IS_CHAR(c))) {
3366 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3367 "invalid character in attribute value\n");
3368 } else {
3369 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3370 "AttValue: ' expected\n");
3371 }
Owen Taylor3473f882001-02-23 17:55:21 +00003372 } else
3373 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003374 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003375 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003376
3377mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003378 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003379 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003380}
3381
3382/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003383 * xmlParseAttValue:
3384 * @ctxt: an XML parser context
3385 *
3386 * parse a value for an attribute
3387 * Note: the parser won't do substitution of entities here, this
3388 * will be handled later in xmlStringGetNodeList
3389 *
3390 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3391 * "'" ([^<&'] | Reference)* "'"
3392 *
3393 * 3.3.3 Attribute-Value Normalization:
3394 * Before the value of an attribute is passed to the application or
3395 * checked for validity, the XML processor must normalize it as follows:
3396 * - a character reference is processed by appending the referenced
3397 * character to the attribute value
3398 * - an entity reference is processed by recursively processing the
3399 * replacement text of the entity
3400 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3401 * appending #x20 to the normalized value, except that only a single
3402 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3403 * parsed entity or the literal entity value of an internal parsed entity
3404 * - other characters are processed by appending them to the normalized value
3405 * If the declared value is not CDATA, then the XML processor must further
3406 * process the normalized attribute value by discarding any leading and
3407 * trailing space (#x20) characters, and by replacing sequences of space
3408 * (#x20) characters by a single space (#x20) character.
3409 * All attributes for which no declaration has been read should be treated
3410 * by a non-validating parser as if declared CDATA.
3411 *
3412 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3413 */
3414
3415
3416xmlChar *
3417xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003418 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003419 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003420}
3421
3422/**
Owen Taylor3473f882001-02-23 17:55:21 +00003423 * xmlParseSystemLiteral:
3424 * @ctxt: an XML parser context
3425 *
3426 * parse an XML Literal
3427 *
3428 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3429 *
3430 * Returns the SystemLiteral parsed or NULL
3431 */
3432
3433xmlChar *
3434xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3435 xmlChar *buf = NULL;
3436 int len = 0;
3437 int size = XML_PARSER_BUFFER_SIZE;
3438 int cur, l;
3439 xmlChar stop;
3440 int state = ctxt->instate;
3441 int count = 0;
3442
3443 SHRINK;
3444 if (RAW == '"') {
3445 NEXT;
3446 stop = '"';
3447 } else if (RAW == '\'') {
3448 NEXT;
3449 stop = '\'';
3450 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003451 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003452 return(NULL);
3453 }
3454
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003455 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003456 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003457 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003458 return(NULL);
3459 }
3460 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3461 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003462 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003463 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003464 xmlChar *tmp;
3465
Owen Taylor3473f882001-02-23 17:55:21 +00003466 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003467 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3468 if (tmp == NULL) {
3469 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 ctxt->instate = (xmlParserInputState) state;
3472 return(NULL);
3473 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003474 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 }
3476 count++;
3477 if (count > 50) {
3478 GROW;
3479 count = 0;
3480 }
3481 COPY_BUF(l,buf,len,cur);
3482 NEXTL(l);
3483 cur = CUR_CHAR(l);
3484 if (cur == 0) {
3485 GROW;
3486 SHRINK;
3487 cur = CUR_CHAR(l);
3488 }
3489 }
3490 buf[len] = 0;
3491 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003492 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003493 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 } else {
3495 NEXT;
3496 }
3497 return(buf);
3498}
3499
3500/**
3501 * xmlParsePubidLiteral:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse an XML public literal
3505 *
3506 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3507 *
3508 * Returns the PubidLiteral parsed or NULL.
3509 */
3510
3511xmlChar *
3512xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3513 xmlChar *buf = NULL;
3514 int len = 0;
3515 int size = XML_PARSER_BUFFER_SIZE;
3516 xmlChar cur;
3517 xmlChar stop;
3518 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003519 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003520
3521 SHRINK;
3522 if (RAW == '"') {
3523 NEXT;
3524 stop = '"';
3525 } else if (RAW == '\'') {
3526 NEXT;
3527 stop = '\'';
3528 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003529 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003530 return(NULL);
3531 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003532 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003533 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003534 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003535 return(NULL);
3536 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003537 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003538 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003539 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003540 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003541 xmlChar *tmp;
3542
Owen Taylor3473f882001-02-23 17:55:21 +00003543 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003544 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3545 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003546 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003547 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 return(NULL);
3549 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003550 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003551 }
3552 buf[len++] = cur;
3553 count++;
3554 if (count > 50) {
3555 GROW;
3556 count = 0;
3557 }
3558 NEXT;
3559 cur = CUR;
3560 if (cur == 0) {
3561 GROW;
3562 SHRINK;
3563 cur = CUR;
3564 }
3565 }
3566 buf[len] = 0;
3567 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003568 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 } else {
3570 NEXT;
3571 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003572 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003573 return(buf);
3574}
3575
Daniel Veillard48b2f892001-02-25 16:11:03 +00003576void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003577
3578/*
3579 * used for the test in the inner loop of the char data testing
3580 */
3581static const unsigned char test_char_data[256] = {
3582 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3583 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3584 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3585 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3586 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3587 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3588 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3589 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3590 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3591 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3592 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3593 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3594 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3595 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3596 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3597 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3598 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3599 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3601 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3608 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3609 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3610 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3611 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3612 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3613 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3614};
3615
Owen Taylor3473f882001-02-23 17:55:21 +00003616/**
3617 * xmlParseCharData:
3618 * @ctxt: an XML parser context
3619 * @cdata: int indicating whether we are within a CDATA section
3620 *
3621 * parse a CharData section.
3622 * if we are within a CDATA section ']]>' marks an end of section.
3623 *
3624 * The right angle bracket (>) may be represented using the string "&gt;",
3625 * and must, for compatibility, be escaped using "&gt;" or a character
3626 * reference when it appears in the string "]]>" in content, when that
3627 * string is not marking the end of a CDATA section.
3628 *
3629 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3630 */
3631
3632void
3633xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003634 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003635 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003636 int line = ctxt->input->line;
3637 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003638 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003639
3640 SHRINK;
3641 GROW;
3642 /*
3643 * Accelerated common case where input don't need to be
3644 * modified before passing it to the handler.
3645 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003646 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003647 in = ctxt->input->cur;
3648 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003649get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003650 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003651 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003652 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003653 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003654 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003655 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003656 goto get_more_space;
3657 }
3658 if (*in == '<') {
3659 nbchar = in - ctxt->input->cur;
3660 if (nbchar > 0) {
3661 const xmlChar *tmp = ctxt->input->cur;
3662 ctxt->input->cur = in;
3663
Daniel Veillard34099b42004-11-04 17:34:35 +00003664 if ((ctxt->sax != NULL) &&
3665 (ctxt->sax->ignorableWhitespace !=
3666 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003667 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003668 if (ctxt->sax->ignorableWhitespace != NULL)
3669 ctxt->sax->ignorableWhitespace(ctxt->userData,
3670 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003671 } else {
3672 if (ctxt->sax->characters != NULL)
3673 ctxt->sax->characters(ctxt->userData,
3674 tmp, nbchar);
3675 if (*ctxt->space == -1)
3676 *ctxt->space = -2;
3677 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003678 } else if ((ctxt->sax != NULL) &&
3679 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003680 ctxt->sax->characters(ctxt->userData,
3681 tmp, nbchar);
3682 }
3683 }
3684 return;
3685 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003686
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003687get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003688 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003689 while (test_char_data[*in]) {
3690 in++;
3691 ccol++;
3692 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003693 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003694 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003695 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003696 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003697 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003698 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003699 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003700 }
3701 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003702 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003703 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003704 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003705 return;
3706 }
3707 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003708 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003709 goto get_more;
3710 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003711 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003712 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003713 if ((ctxt->sax != NULL) &&
3714 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003715 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003716 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003717 const xmlChar *tmp = ctxt->input->cur;
3718 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003719
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003720 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003721 if (ctxt->sax->ignorableWhitespace != NULL)
3722 ctxt->sax->ignorableWhitespace(ctxt->userData,
3723 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003724 } else {
3725 if (ctxt->sax->characters != NULL)
3726 ctxt->sax->characters(ctxt->userData,
3727 tmp, nbchar);
3728 if (*ctxt->space == -1)
3729 *ctxt->space = -2;
3730 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003731 line = ctxt->input->line;
3732 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003733 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003734 if (ctxt->sax->characters != NULL)
3735 ctxt->sax->characters(ctxt->userData,
3736 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003737 line = ctxt->input->line;
3738 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003739 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003740 }
3741 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003742 if (*in == 0xD) {
3743 in++;
3744 if (*in == 0xA) {
3745 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003746 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003747 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003748 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003749 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003750 in--;
3751 }
3752 if (*in == '<') {
3753 return;
3754 }
3755 if (*in == '&') {
3756 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003757 }
3758 SHRINK;
3759 GROW;
3760 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003761 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003762 nbchar = 0;
3763 }
Daniel Veillard50582112001-03-26 22:52:16 +00003764 ctxt->input->line = line;
3765 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003766 xmlParseCharDataComplex(ctxt, cdata);
3767}
3768
Daniel Veillard01c13b52002-12-10 15:19:08 +00003769/**
3770 * xmlParseCharDataComplex:
3771 * @ctxt: an XML parser context
3772 * @cdata: int indicating whether we are within a CDATA section
3773 *
3774 * parse a CharData section.this is the fallback function
3775 * of xmlParseCharData() when the parsing requires handling
3776 * of non-ASCII characters.
3777 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003778void
3779xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3781 int nbchar = 0;
3782 int cur, l;
3783 int count = 0;
3784
3785 SHRINK;
3786 GROW;
3787 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003788 while ((cur != '<') && /* checked */
3789 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003791 if ((cur == ']') && (NXT(1) == ']') &&
3792 (NXT(2) == '>')) {
3793 if (cdata) break;
3794 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003795 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 }
3797 }
3798 COPY_BUF(l,buf,nbchar,cur);
3799 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003800 buf[nbchar] = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003803 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003804 */
3805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003806 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (ctxt->sax->ignorableWhitespace != NULL)
3808 ctxt->sax->ignorableWhitespace(ctxt->userData,
3809 buf, nbchar);
3810 } else {
3811 if (ctxt->sax->characters != NULL)
3812 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003813 if ((ctxt->sax->characters !=
3814 ctxt->sax->ignorableWhitespace) &&
3815 (*ctxt->space == -1))
3816 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 }
3818 }
3819 nbchar = 0;
3820 }
3821 count++;
3822 if (count > 50) {
3823 GROW;
3824 count = 0;
3825 }
3826 NEXTL(l);
3827 cur = CUR_CHAR(l);
3828 }
3829 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003830 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003832 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003833 */
3834 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003835 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003836 if (ctxt->sax->ignorableWhitespace != NULL)
3837 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3838 } else {
3839 if (ctxt->sax->characters != NULL)
3840 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003841 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3842 (*ctxt->space == -1))
3843 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 }
3846 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003847 if ((cur != 0) && (!IS_CHAR(cur))) {
3848 /* Generate the error and skip the offending character */
3849 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3850 "PCDATA invalid Char value %d\n",
3851 cur);
3852 NEXTL(l);
3853 }
Owen Taylor3473f882001-02-23 17:55:21 +00003854}
3855
3856/**
3857 * xmlParseExternalID:
3858 * @ctxt: an XML parser context
3859 * @publicID: a xmlChar** receiving PubidLiteral
3860 * @strict: indicate whether we should restrict parsing to only
3861 * production [75], see NOTE below
3862 *
3863 * Parse an External ID or a Public ID
3864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003865 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003866 * 'PUBLIC' S PubidLiteral S SystemLiteral
3867 *
3868 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3869 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3870 *
3871 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3872 *
3873 * Returns the function returns SystemLiteral and in the second
3874 * case publicID receives PubidLiteral, is strict is off
3875 * it is possible to return NULL and have publicID set.
3876 */
3877
3878xmlChar *
3879xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3880 xmlChar *URI = NULL;
3881
3882 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003883
3884 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003885 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003886 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003887 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003888 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3889 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003890 }
3891 SKIP_BLANKS;
3892 URI = xmlParseSystemLiteral(ctxt);
3893 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003894 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003895 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003896 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003897 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003898 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003899 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003900 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003901 }
3902 SKIP_BLANKS;
3903 *publicID = xmlParsePubidLiteral(ctxt);
3904 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003905 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003906 }
3907 if (strict) {
3908 /*
3909 * We don't handle [83] so "S SystemLiteral" is required.
3910 */
William M. Brack76e95df2003-10-18 16:20:14 +00003911 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003912 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003913 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003914 }
3915 } else {
3916 /*
3917 * We handle [83] so we return immediately, if
3918 * "S SystemLiteral" is not detected. From a purely parsing
3919 * point of view that's a nice mess.
3920 */
3921 const xmlChar *ptr;
3922 GROW;
3923
3924 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003925 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003926
William M. Brack76e95df2003-10-18 16:20:14 +00003927 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003928 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3929 }
3930 SKIP_BLANKS;
3931 URI = xmlParseSystemLiteral(ctxt);
3932 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003933 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 }
3935 }
3936 return(URI);
3937}
3938
3939/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003940 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003941 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003942 * @buf: the already parsed part of the buffer
3943 * @len: number of bytes filles in the buffer
3944 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003945 *
3946 * Skip an XML (SGML) comment <!-- .... -->
3947 * The spec says that "For compatibility, the string "--" (double-hyphen)
3948 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003949 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003950 *
3951 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3952 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003953static void
3954xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003955 int q, ql;
3956 int r, rl;
3957 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003958 xmlParserInputPtr input = ctxt->input;
3959 int count = 0;
3960
Owen Taylor3473f882001-02-23 17:55:21 +00003961 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003962 len = 0;
3963 size = XML_PARSER_BUFFER_SIZE;
3964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3965 if (buf == NULL) {
3966 xmlErrMemory(ctxt, NULL);
3967 return;
3968 }
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003970 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003971 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003972 if (q == 0)
3973 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003974 if (!IS_CHAR(q)) {
3975 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3976 "xmlParseComment: invalid xmlChar value %d\n",
3977 q);
3978 xmlFree (buf);
3979 return;
3980 }
Owen Taylor3473f882001-02-23 17:55:21 +00003981 NEXTL(ql);
3982 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003983 if (r == 0)
3984 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003985 if (!IS_CHAR(r)) {
3986 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3987 "xmlParseComment: invalid xmlChar value %d\n",
3988 q);
3989 xmlFree (buf);
3990 return;
3991 }
Owen Taylor3473f882001-02-23 17:55:21 +00003992 NEXTL(rl);
3993 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003994 if (cur == 0)
3995 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003996 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003997 ((cur != '>') ||
3998 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003999 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004000 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004001 }
4002 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004003 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004004 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004005 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4006 if (new_buf == NULL) {
4007 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004008 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004009 return;
4010 }
William M. Bracka3215c72004-07-31 16:24:01 +00004011 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 COPY_BUF(ql,buf,len,q);
4014 q = r;
4015 ql = rl;
4016 r = cur;
4017 rl = l;
4018
4019 count++;
4020 if (count > 50) {
4021 GROW;
4022 count = 0;
4023 }
4024 NEXTL(l);
4025 cur = CUR_CHAR(l);
4026 if (cur == 0) {
4027 SHRINK;
4028 GROW;
4029 cur = CUR_CHAR(l);
4030 }
4031 }
4032 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004033 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004034 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004035 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004036 } else if (!IS_CHAR(cur)) {
4037 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4038 "xmlParseComment: invalid xmlChar value %d\n",
4039 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004040 } else {
4041 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004042 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4043 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004044 }
4045 NEXT;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4047 (!ctxt->disableSAX))
4048 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 }
Daniel Veillardda629342007-08-01 07:49:06 +00004050 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004051 return;
4052not_terminated:
4053 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4054 "Comment not terminated\n", NULL);
4055 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004056 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004057}
Daniel Veillardda629342007-08-01 07:49:06 +00004058
Daniel Veillard4c778d82005-01-23 17:37:44 +00004059/**
4060 * xmlParseComment:
4061 * @ctxt: an XML parser context
4062 *
4063 * Skip an XML (SGML) comment <!-- .... -->
4064 * The spec says that "For compatibility, the string "--" (double-hyphen)
4065 * must not occur within comments. "
4066 *
4067 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4068 */
4069void
4070xmlParseComment(xmlParserCtxtPtr ctxt) {
4071 xmlChar *buf = NULL;
4072 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004073 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004074 xmlParserInputState state;
4075 const xmlChar *in;
4076 int nbchar = 0, ccol;
4077
4078 /*
4079 * Check that there is a comment right here.
4080 */
4081 if ((RAW != '<') || (NXT(1) != '!') ||
4082 (NXT(2) != '-') || (NXT(3) != '-')) return;
4083
4084 state = ctxt->instate;
4085 ctxt->instate = XML_PARSER_COMMENT;
4086 SKIP(4);
4087 SHRINK;
4088 GROW;
4089
4090 /*
4091 * Accelerated common case where input don't need to be
4092 * modified before passing it to the handler.
4093 */
4094 in = ctxt->input->cur;
4095 do {
4096 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004097 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004098 ctxt->input->line++; ctxt->input->col = 1;
4099 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004100 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004101 }
4102get_more:
4103 ccol = ctxt->input->col;
4104 while (((*in > '-') && (*in <= 0x7F)) ||
4105 ((*in >= 0x20) && (*in < '-')) ||
4106 (*in == 0x09)) {
4107 in++;
4108 ccol++;
4109 }
4110 ctxt->input->col = ccol;
4111 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004112 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004113 ctxt->input->line++; ctxt->input->col = 1;
4114 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004115 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004116 goto get_more;
4117 }
4118 nbchar = in - ctxt->input->cur;
4119 /*
4120 * save current set of data
4121 */
4122 if (nbchar > 0) {
4123 if ((ctxt->sax != NULL) &&
4124 (ctxt->sax->comment != NULL)) {
4125 if (buf == NULL) {
4126 if ((*in == '-') && (in[1] == '-'))
4127 size = nbchar + 1;
4128 else
4129 size = XML_PARSER_BUFFER_SIZE + nbchar;
4130 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4131 if (buf == NULL) {
4132 xmlErrMemory(ctxt, NULL);
4133 ctxt->instate = state;
4134 return;
4135 }
4136 len = 0;
4137 } else if (len + nbchar + 1 >= size) {
4138 xmlChar *new_buf;
4139 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4140 new_buf = (xmlChar *) xmlRealloc(buf,
4141 size * sizeof(xmlChar));
4142 if (new_buf == NULL) {
4143 xmlFree (buf);
4144 xmlErrMemory(ctxt, NULL);
4145 ctxt->instate = state;
4146 return;
4147 }
4148 buf = new_buf;
4149 }
4150 memcpy(&buf[len], ctxt->input->cur, nbchar);
4151 len += nbchar;
4152 buf[len] = 0;
4153 }
4154 }
4155 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004156 if (*in == 0xA) {
4157 in++;
4158 ctxt->input->line++; ctxt->input->col = 1;
4159 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004160 if (*in == 0xD) {
4161 in++;
4162 if (*in == 0xA) {
4163 ctxt->input->cur = in;
4164 in++;
4165 ctxt->input->line++; ctxt->input->col = 1;
4166 continue; /* while */
4167 }
4168 in--;
4169 }
4170 SHRINK;
4171 GROW;
4172 in = ctxt->input->cur;
4173 if (*in == '-') {
4174 if (in[1] == '-') {
4175 if (in[2] == '>') {
4176 SKIP(3);
4177 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4178 (!ctxt->disableSAX)) {
4179 if (buf != NULL)
4180 ctxt->sax->comment(ctxt->userData, buf);
4181 else
4182 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4183 }
4184 if (buf != NULL)
4185 xmlFree(buf);
4186 ctxt->instate = state;
4187 return;
4188 }
4189 if (buf != NULL)
4190 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4191 "Comment not terminated \n<!--%.50s\n",
4192 buf);
4193 else
4194 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4195 "Comment not terminated \n", NULL);
4196 in++;
4197 ctxt->input->col++;
4198 }
4199 in++;
4200 ctxt->input->col++;
4201 goto get_more;
4202 }
4203 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4204 xmlParseCommentComplex(ctxt, buf, len, size);
4205 ctxt->instate = state;
4206 return;
4207}
4208
Owen Taylor3473f882001-02-23 17:55:21 +00004209
4210/**
4211 * xmlParsePITarget:
4212 * @ctxt: an XML parser context
4213 *
4214 * parse the name of a PI
4215 *
4216 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4217 *
4218 * Returns the PITarget name or NULL
4219 */
4220
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004221const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004222xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004223 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004224
4225 name = xmlParseName(ctxt);
4226 if ((name != NULL) &&
4227 ((name[0] == 'x') || (name[0] == 'X')) &&
4228 ((name[1] == 'm') || (name[1] == 'M')) &&
4229 ((name[2] == 'l') || (name[2] == 'L'))) {
4230 int i;
4231 if ((name[0] == 'x') && (name[1] == 'm') &&
4232 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004233 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004234 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004235 return(name);
4236 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 return(name);
4239 }
4240 for (i = 0;;i++) {
4241 if (xmlW3CPIs[i] == NULL) break;
4242 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4243 return(name);
4244 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004245 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4246 "xmlParsePITarget: invalid name prefix 'xml'\n",
4247 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 return(name);
4250}
4251
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004252#ifdef LIBXML_CATALOG_ENABLED
4253/**
4254 * xmlParseCatalogPI:
4255 * @ctxt: an XML parser context
4256 * @catalog: the PI value string
4257 *
4258 * parse an XML Catalog Processing Instruction.
4259 *
4260 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4261 *
4262 * Occurs only if allowed by the user and if happening in the Misc
4263 * part of the document before any doctype informations
4264 * This will add the given catalog to the parsing context in order
4265 * to be used if there is a resolution need further down in the document
4266 */
4267
4268static void
4269xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4270 xmlChar *URL = NULL;
4271 const xmlChar *tmp, *base;
4272 xmlChar marker;
4273
4274 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004275 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004276 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4277 goto error;
4278 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004279 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004280 if (*tmp != '=') {
4281 return;
4282 }
4283 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004284 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004285 marker = *tmp;
4286 if ((marker != '\'') && (marker != '"'))
4287 goto error;
4288 tmp++;
4289 base = tmp;
4290 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4291 if (*tmp == 0)
4292 goto error;
4293 URL = xmlStrndup(base, tmp - base);
4294 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004295 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004296 if (*tmp != 0)
4297 goto error;
4298
4299 if (URL != NULL) {
4300 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4301 xmlFree(URL);
4302 }
4303 return;
4304
4305error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004306 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4307 "Catalog PI syntax error: %s\n",
4308 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004309 if (URL != NULL)
4310 xmlFree(URL);
4311}
4312#endif
4313
Owen Taylor3473f882001-02-23 17:55:21 +00004314/**
4315 * xmlParsePI:
4316 * @ctxt: an XML parser context
4317 *
4318 * parse an XML Processing Instruction.
4319 *
4320 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4321 *
4322 * The processing is transfered to SAX once parsed.
4323 */
4324
4325void
4326xmlParsePI(xmlParserCtxtPtr ctxt) {
4327 xmlChar *buf = NULL;
4328 int len = 0;
4329 int size = XML_PARSER_BUFFER_SIZE;
4330 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004331 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 xmlParserInputState state;
4333 int count = 0;
4334
4335 if ((RAW == '<') && (NXT(1) == '?')) {
4336 xmlParserInputPtr input = ctxt->input;
4337 state = ctxt->instate;
4338 ctxt->instate = XML_PARSER_PI;
4339 /*
4340 * this is a Processing Instruction.
4341 */
4342 SKIP(2);
4343 SHRINK;
4344
4345 /*
4346 * Parse the target name and check for special support like
4347 * namespace.
4348 */
4349 target = xmlParsePITarget(ctxt);
4350 if (target != NULL) {
4351 if ((RAW == '?') && (NXT(1) == '>')) {
4352 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004353 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4354 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004355 }
4356 SKIP(2);
4357
4358 /*
4359 * SAX: PI detected.
4360 */
4361 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->processingInstruction != NULL))
4363 ctxt->sax->processingInstruction(ctxt->userData,
4364 target, NULL);
4365 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 return;
4367 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004368 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004369 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004370 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 ctxt->instate = state;
4372 return;
4373 }
4374 cur = CUR;
4375 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004376 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4377 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 SKIP_BLANKS;
4380 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004381 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004382 ((cur != '?') || (NXT(1) != '>'))) {
4383 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004384 xmlChar *tmp;
4385
Owen Taylor3473f882001-02-23 17:55:21 +00004386 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004387 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4388 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004389 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004390 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004391 ctxt->instate = state;
4392 return;
4393 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004394 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004395 }
4396 count++;
4397 if (count > 50) {
4398 GROW;
4399 count = 0;
4400 }
4401 COPY_BUF(l,buf,len,cur);
4402 NEXTL(l);
4403 cur = CUR_CHAR(l);
4404 if (cur == 0) {
4405 SHRINK;
4406 GROW;
4407 cur = CUR_CHAR(l);
4408 }
4409 }
4410 buf[len] = 0;
4411 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004412 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4413 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004414 } else {
4415 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4417 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004418 }
4419 SKIP(2);
4420
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004421#ifdef LIBXML_CATALOG_ENABLED
4422 if (((state == XML_PARSER_MISC) ||
4423 (state == XML_PARSER_START)) &&
4424 (xmlStrEqual(target, XML_CATALOG_PI))) {
4425 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4426 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4427 (allow == XML_CATA_ALLOW_ALL))
4428 xmlParseCatalogPI(ctxt, buf);
4429 }
4430#endif
4431
4432
Owen Taylor3473f882001-02-23 17:55:21 +00004433 /*
4434 * SAX: PI detected.
4435 */
4436 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4437 (ctxt->sax->processingInstruction != NULL))
4438 ctxt->sax->processingInstruction(ctxt->userData,
4439 target, buf);
4440 }
4441 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004443 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 ctxt->instate = state;
4446 }
4447}
4448
4449/**
4450 * xmlParseNotationDecl:
4451 * @ctxt: an XML parser context
4452 *
4453 * parse a notation declaration
4454 *
4455 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4456 *
4457 * Hence there is actually 3 choices:
4458 * 'PUBLIC' S PubidLiteral
4459 * 'PUBLIC' S PubidLiteral S SystemLiteral
4460 * and 'SYSTEM' S SystemLiteral
4461 *
4462 * See the NOTE on xmlParseExternalID().
4463 */
4464
4465void
4466xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004467 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004468 xmlChar *Pubid;
4469 xmlChar *Systemid;
4470
Daniel Veillarda07050d2003-10-19 14:46:32 +00004471 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004472 xmlParserInputPtr input = ctxt->input;
4473 SHRINK;
4474 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004475 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004476 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4477 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004478 return;
4479 }
4480 SKIP_BLANKS;
4481
Daniel Veillard76d66f42001-05-16 21:05:17 +00004482 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004483 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004484 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004485 return;
4486 }
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004489 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 return;
4491 }
4492 SKIP_BLANKS;
4493
4494 /*
4495 * Parse the IDs.
4496 */
4497 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4498 SKIP_BLANKS;
4499
4500 if (RAW == '>') {
4501 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004502 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4503 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004504 }
4505 NEXT;
4506 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4507 (ctxt->sax->notationDecl != NULL))
4508 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4509 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004510 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
Owen Taylor3473f882001-02-23 17:55:21 +00004512 if (Systemid != NULL) xmlFree(Systemid);
4513 if (Pubid != NULL) xmlFree(Pubid);
4514 }
4515}
4516
4517/**
4518 * xmlParseEntityDecl:
4519 * @ctxt: an XML parser context
4520 *
4521 * parse <!ENTITY declarations
4522 *
4523 * [70] EntityDecl ::= GEDecl | PEDecl
4524 *
4525 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4526 *
4527 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4528 *
4529 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4530 *
4531 * [74] PEDef ::= EntityValue | ExternalID
4532 *
4533 * [76] NDataDecl ::= S 'NDATA' S Name
4534 *
4535 * [ VC: Notation Declared ]
4536 * The Name must match the declared name of a notation.
4537 */
4538
4539void
4540xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004541 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004542 xmlChar *value = NULL;
4543 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004544 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004545 int isParameter = 0;
4546 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004547 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004548
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004550 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004552 SHRINK;
4553 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004554 skipped = SKIP_BLANKS;
4555 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4557 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
Owen Taylor3473f882001-02-23 17:55:21 +00004559
4560 if (RAW == '%') {
4561 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004562 skipped = SKIP_BLANKS;
4563 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4565 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004566 }
Owen Taylor3473f882001-02-23 17:55:21 +00004567 isParameter = 1;
4568 }
4569
Daniel Veillard76d66f42001-05-16 21:05:17 +00004570 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004571 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004572 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4573 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004574 return;
4575 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004576 skipped = SKIP_BLANKS;
4577 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4579 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004580 }
Owen Taylor3473f882001-02-23 17:55:21 +00004581
Daniel Veillardf5582f12002-06-11 10:08:16 +00004582 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004583 /*
4584 * handle the various case of definitions...
4585 */
4586 if (isParameter) {
4587 if ((RAW == '"') || (RAW == '\'')) {
4588 value = xmlParseEntityValue(ctxt, &orig);
4589 if (value) {
4590 if ((ctxt->sax != NULL) &&
4591 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4592 ctxt->sax->entityDecl(ctxt->userData, name,
4593 XML_INTERNAL_PARAMETER_ENTITY,
4594 NULL, NULL, value);
4595 }
4596 } else {
4597 URI = xmlParseExternalID(ctxt, &literal, 1);
4598 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 }
4601 if (URI) {
4602 xmlURIPtr uri;
4603
4604 uri = xmlParseURI((const char *) URI);
4605 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004606 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4607 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004608 /*
4609 * This really ought to be a well formedness error
4610 * but the XML Core WG decided otherwise c.f. issue
4611 * E26 of the XML erratas.
4612 */
Owen Taylor3473f882001-02-23 17:55:21 +00004613 } else {
4614 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004615 /*
4616 * Okay this is foolish to block those but not
4617 * invalid URIs.
4618 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004619 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004620 } else {
4621 if ((ctxt->sax != NULL) &&
4622 (!ctxt->disableSAX) &&
4623 (ctxt->sax->entityDecl != NULL))
4624 ctxt->sax->entityDecl(ctxt->userData, name,
4625 XML_EXTERNAL_PARAMETER_ENTITY,
4626 literal, URI, NULL);
4627 }
4628 xmlFreeURI(uri);
4629 }
4630 }
4631 }
4632 } else {
4633 if ((RAW == '"') || (RAW == '\'')) {
4634 value = xmlParseEntityValue(ctxt, &orig);
4635 if ((ctxt->sax != NULL) &&
4636 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4637 ctxt->sax->entityDecl(ctxt->userData, name,
4638 XML_INTERNAL_GENERAL_ENTITY,
4639 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004640 /*
4641 * For expat compatibility in SAX mode.
4642 */
4643 if ((ctxt->myDoc == NULL) ||
4644 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4645 if (ctxt->myDoc == NULL) {
4646 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4647 }
4648 if (ctxt->myDoc->intSubset == NULL)
4649 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4650 BAD_CAST "fake", NULL, NULL);
4651
Daniel Veillard1af9a412003-08-20 22:54:39 +00004652 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4653 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 } else {
4656 URI = xmlParseExternalID(ctxt, &literal, 1);
4657 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004658 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
4660 if (URI) {
4661 xmlURIPtr uri;
4662
4663 uri = xmlParseURI((const char *)URI);
4664 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004665 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4666 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004667 /*
4668 * This really ought to be a well formedness error
4669 * but the XML Core WG decided otherwise c.f. issue
4670 * E26 of the XML erratas.
4671 */
Owen Taylor3473f882001-02-23 17:55:21 +00004672 } else {
4673 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004674 /*
4675 * Okay this is foolish to block those but not
4676 * invalid URIs.
4677 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 }
4680 xmlFreeURI(uri);
4681 }
4682 }
William M. Brack76e95df2003-10-18 16:20:14 +00004683 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004684 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4685 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004686 }
4687 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004688 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004689 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004690 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4692 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004693 }
4694 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004695 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4697 (ctxt->sax->unparsedEntityDecl != NULL))
4698 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4699 literal, URI, ndata);
4700 } else {
4701 if ((ctxt->sax != NULL) &&
4702 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4703 ctxt->sax->entityDecl(ctxt->userData, name,
4704 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4705 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004706 /*
4707 * For expat compatibility in SAX mode.
4708 * assuming the entity repalcement was asked for
4709 */
4710 if ((ctxt->replaceEntities != 0) &&
4711 ((ctxt->myDoc == NULL) ||
4712 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4713 if (ctxt->myDoc == NULL) {
4714 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4715 }
4716
4717 if (ctxt->myDoc->intSubset == NULL)
4718 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4719 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004720 xmlSAX2EntityDecl(ctxt, name,
4721 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4722 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004723 }
Owen Taylor3473f882001-02-23 17:55:21 +00004724 }
4725 }
4726 }
4727 SKIP_BLANKS;
4728 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004729 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004730 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 } else {
4732 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004733 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4734 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004735 }
4736 NEXT;
4737 }
4738 if (orig != NULL) {
4739 /*
4740 * Ugly mechanism to save the raw entity value.
4741 */
4742 xmlEntityPtr cur = NULL;
4743
4744 if (isParameter) {
4745 if ((ctxt->sax != NULL) &&
4746 (ctxt->sax->getParameterEntity != NULL))
4747 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4748 } else {
4749 if ((ctxt->sax != NULL) &&
4750 (ctxt->sax->getEntity != NULL))
4751 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004752 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004753 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004754 }
Owen Taylor3473f882001-02-23 17:55:21 +00004755 }
4756 if (cur != NULL) {
4757 if (cur->orig != NULL)
4758 xmlFree(orig);
4759 else
4760 cur->orig = orig;
4761 } else
4762 xmlFree(orig);
4763 }
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (value != NULL) xmlFree(value);
4765 if (URI != NULL) xmlFree(URI);
4766 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004767 }
4768}
4769
4770/**
4771 * xmlParseDefaultDecl:
4772 * @ctxt: an XML parser context
4773 * @value: Receive a possible fixed default value for the attribute
4774 *
4775 * Parse an attribute default declaration
4776 *
4777 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4778 *
4779 * [ VC: Required Attribute ]
4780 * if the default declaration is the keyword #REQUIRED, then the
4781 * attribute must be specified for all elements of the type in the
4782 * attribute-list declaration.
4783 *
4784 * [ VC: Attribute Default Legal ]
4785 * The declared default value must meet the lexical constraints of
4786 * the declared attribute type c.f. xmlValidateAttributeDecl()
4787 *
4788 * [ VC: Fixed Attribute Default ]
4789 * if an attribute has a default value declared with the #FIXED
4790 * keyword, instances of that attribute must match the default value.
4791 *
4792 * [ WFC: No < in Attribute Values ]
4793 * handled in xmlParseAttValue()
4794 *
4795 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4796 * or XML_ATTRIBUTE_FIXED.
4797 */
4798
4799int
4800xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4801 int val;
4802 xmlChar *ret;
4803
4804 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004805 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004806 SKIP(9);
4807 return(XML_ATTRIBUTE_REQUIRED);
4808 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 SKIP(8);
4811 return(XML_ATTRIBUTE_IMPLIED);
4812 }
4813 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004814 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004815 SKIP(6);
4816 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004817 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004818 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4819 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004820 }
4821 SKIP_BLANKS;
4822 }
4823 ret = xmlParseAttValue(ctxt);
4824 ctxt->instate = XML_PARSER_DTD;
4825 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004826 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004827 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 } else
4829 *value = ret;
4830 return(val);
4831}
4832
4833/**
4834 * xmlParseNotationType:
4835 * @ctxt: an XML parser context
4836 *
4837 * parse an Notation attribute type.
4838 *
4839 * Note: the leading 'NOTATION' S part has already being parsed...
4840 *
4841 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4842 *
4843 * [ VC: Notation Attributes ]
4844 * Values of this type must match one of the notation names included
4845 * in the declaration; all notation names in the declaration must be declared.
4846 *
4847 * Returns: the notation attribute tree built while parsing
4848 */
4849
4850xmlEnumerationPtr
4851xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004852 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004853 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4854
4855 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004856 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 return(NULL);
4858 }
4859 SHRINK;
4860 do {
4861 NEXT;
4862 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004863 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004864 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004865 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4866 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004867 return(ret);
4868 }
4869 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004870 if (cur == NULL) return(ret);
4871 if (last == NULL) ret = last = cur;
4872 else {
4873 last->next = cur;
4874 last = cur;
4875 }
4876 SKIP_BLANKS;
4877 } while (RAW == '|');
4878 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004879 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004880 if ((last != NULL) && (last != ret))
4881 xmlFreeEnumeration(last);
4882 return(ret);
4883 }
4884 NEXT;
4885 return(ret);
4886}
4887
4888/**
4889 * xmlParseEnumerationType:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse an Enumeration attribute type.
4893 *
4894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4895 *
4896 * [ VC: Enumeration ]
4897 * Values of this type must match one of the Nmtoken tokens in
4898 * the declaration
4899 *
4900 * Returns: the enumeration attribute tree built while parsing
4901 */
4902
4903xmlEnumerationPtr
4904xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4905 xmlChar *name;
4906 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4907
4908 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004910 return(NULL);
4911 }
4912 SHRINK;
4913 do {
4914 NEXT;
4915 SKIP_BLANKS;
4916 name = xmlParseNmtoken(ctxt);
4917 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 return(ret);
4920 }
4921 cur = xmlCreateEnumeration(name);
4922 xmlFree(name);
4923 if (cur == NULL) return(ret);
4924 if (last == NULL) ret = last = cur;
4925 else {
4926 last->next = cur;
4927 last = cur;
4928 }
4929 SKIP_BLANKS;
4930 } while (RAW == '|');
4931 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004932 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 return(ret);
4934 }
4935 NEXT;
4936 return(ret);
4937}
4938
4939/**
4940 * xmlParseEnumeratedType:
4941 * @ctxt: an XML parser context
4942 * @tree: the enumeration tree built while parsing
4943 *
4944 * parse an Enumerated attribute type.
4945 *
4946 * [57] EnumeratedType ::= NotationType | Enumeration
4947 *
4948 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4949 *
4950 *
4951 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4952 */
4953
4954int
4955xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004956 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004957 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4960 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 return(0);
4962 }
4963 SKIP_BLANKS;
4964 *tree = xmlParseNotationType(ctxt);
4965 if (*tree == NULL) return(0);
4966 return(XML_ATTRIBUTE_NOTATION);
4967 }
4968 *tree = xmlParseEnumerationType(ctxt);
4969 if (*tree == NULL) return(0);
4970 return(XML_ATTRIBUTE_ENUMERATION);
4971}
4972
4973/**
4974 * xmlParseAttributeType:
4975 * @ctxt: an XML parser context
4976 * @tree: the enumeration tree built while parsing
4977 *
4978 * parse the Attribute list def for an element
4979 *
4980 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4981 *
4982 * [55] StringType ::= 'CDATA'
4983 *
4984 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4985 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4986 *
4987 * Validity constraints for attribute values syntax are checked in
4988 * xmlValidateAttributeValue()
4989 *
4990 * [ VC: ID ]
4991 * Values of type ID must match the Name production. A name must not
4992 * appear more than once in an XML document as a value of this type;
4993 * i.e., ID values must uniquely identify the elements which bear them.
4994 *
4995 * [ VC: One ID per Element Type ]
4996 * No element type may have more than one ID attribute specified.
4997 *
4998 * [ VC: ID Attribute Default ]
4999 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5000 *
5001 * [ VC: IDREF ]
5002 * Values of type IDREF must match the Name production, and values
5003 * of type IDREFS must match Names; each IDREF Name must match the value
5004 * of an ID attribute on some element in the XML document; i.e. IDREF
5005 * values must match the value of some ID attribute.
5006 *
5007 * [ VC: Entity Name ]
5008 * Values of type ENTITY must match the Name production, values
5009 * of type ENTITIES must match Names; each Entity Name must match the
5010 * name of an unparsed entity declared in the DTD.
5011 *
5012 * [ VC: Name Token ]
5013 * Values of type NMTOKEN must match the Nmtoken production; values
5014 * of type NMTOKENS must match Nmtokens.
5015 *
5016 * Returns the attribute type
5017 */
5018int
5019xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5020 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005021 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 SKIP(5);
5023 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005024 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005025 SKIP(6);
5026 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005027 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005028 SKIP(5);
5029 return(XML_ATTRIBUTE_IDREF);
5030 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5031 SKIP(2);
5032 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005033 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005034 SKIP(6);
5035 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005036 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005037 SKIP(8);
5038 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005039 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005040 SKIP(8);
5041 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005042 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005043 SKIP(7);
5044 return(XML_ATTRIBUTE_NMTOKEN);
5045 }
5046 return(xmlParseEnumeratedType(ctxt, tree));
5047}
5048
5049/**
5050 * xmlParseAttributeListDecl:
5051 * @ctxt: an XML parser context
5052 *
5053 * : parse the Attribute list def for an element
5054 *
5055 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5056 *
5057 * [53] AttDef ::= S Name S AttType S DefaultDecl
5058 *
5059 */
5060void
5061xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005062 const xmlChar *elemName;
5063 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 xmlEnumerationPtr tree;
5065
Daniel Veillarda07050d2003-10-19 14:46:32 +00005066 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005067 xmlParserInputPtr input = ctxt->input;
5068
5069 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005070 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005073 }
5074 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005075 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005077 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5078 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005079 return;
5080 }
5081 SKIP_BLANKS;
5082 GROW;
5083 while (RAW != '>') {
5084 const xmlChar *check = CUR_PTR;
5085 int type;
5086 int def;
5087 xmlChar *defaultValue = NULL;
5088
5089 GROW;
5090 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005091 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005092 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005093 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5094 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005095 break;
5096 }
5097 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005098 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005100 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005101 break;
5102 }
5103 SKIP_BLANKS;
5104
5105 type = xmlParseAttributeType(ctxt, &tree);
5106 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005107 break;
5108 }
5109
5110 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005111 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005112 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5113 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if (tree != NULL)
5115 xmlFreeEnumeration(tree);
5116 break;
5117 }
5118 SKIP_BLANKS;
5119
5120 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5121 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if (defaultValue != NULL)
5123 xmlFree(defaultValue);
5124 if (tree != NULL)
5125 xmlFreeEnumeration(tree);
5126 break;
5127 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005128 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5129 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005130
5131 GROW;
5132 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005133 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005135 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005136 if (defaultValue != NULL)
5137 xmlFree(defaultValue);
5138 if (tree != NULL)
5139 xmlFreeEnumeration(tree);
5140 break;
5141 }
5142 SKIP_BLANKS;
5143 }
5144 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5146 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 if (defaultValue != NULL)
5148 xmlFree(defaultValue);
5149 if (tree != NULL)
5150 xmlFreeEnumeration(tree);
5151 break;
5152 }
5153 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5154 (ctxt->sax->attributeDecl != NULL))
5155 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5156 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005157 else if (tree != NULL)
5158 xmlFreeEnumeration(tree);
5159
5160 if ((ctxt->sax2) && (defaultValue != NULL) &&
5161 (def != XML_ATTRIBUTE_IMPLIED) &&
5162 (def != XML_ATTRIBUTE_REQUIRED)) {
5163 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5164 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005165 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005166 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5167 }
Owen Taylor3473f882001-02-23 17:55:21 +00005168 if (defaultValue != NULL)
5169 xmlFree(defaultValue);
5170 GROW;
5171 }
5172 if (RAW == '>') {
5173 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5175 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005176 }
5177 NEXT;
5178 }
Owen Taylor3473f882001-02-23 17:55:21 +00005179 }
5180}
5181
5182/**
5183 * xmlParseElementMixedContentDecl:
5184 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005185 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005186 *
5187 * parse the declaration for a Mixed Element content
5188 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5189 *
5190 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5191 * '(' S? '#PCDATA' S? ')'
5192 *
5193 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5194 *
5195 * [ VC: No Duplicate Types ]
5196 * The same name must not appear more than once in a single
5197 * mixed-content declaration.
5198 *
5199 * returns: the list of the xmlElementContentPtr describing the element choices
5200 */
5201xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005202xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005203 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005204 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005205
5206 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005207 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005208 SKIP(7);
5209 SKIP_BLANKS;
5210 SHRINK;
5211 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005213 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5214"Element content declaration doesn't start and stop in the same entity\n",
5215 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005216 }
Owen Taylor3473f882001-02-23 17:55:21 +00005217 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (RAW == '*') {
5220 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5221 NEXT;
5222 }
5223 return(ret);
5224 }
5225 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 if (ret == NULL) return(NULL);
5228 }
5229 while (RAW == '|') {
5230 NEXT;
5231 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005232 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 if (ret == NULL) return(NULL);
5234 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005235 if (cur != NULL)
5236 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005237 cur = ret;
5238 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005241 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005242 if (n->c1 != NULL)
5243 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005244 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005245 if (n != NULL)
5246 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005247 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005248 }
5249 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005250 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005251 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005253 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 return(NULL);
5256 }
5257 SKIP_BLANKS;
5258 GROW;
5259 }
5260 if ((RAW == ')') && (NXT(1) == '*')) {
5261 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005262 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005263 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005264 if (cur->c2 != NULL)
5265 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 }
5267 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005269 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5270"Element content declaration doesn't start and stop in the same entity\n",
5271 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273 SKIP(2);
5274 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005275 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005276 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005277 return(NULL);
5278 }
5279
5280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005281 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 return(ret);
5284}
5285
5286/**
5287 * xmlParseElementChildrenContentDecl:
5288 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005289 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005290 *
5291 * parse the declaration for a Mixed Element content
5292 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5293 *
5294 *
5295 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5296 *
5297 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5298 *
5299 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5300 *
5301 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5302 *
5303 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5304 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005305 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005306 * opening or closing parentheses in a choice, seq, or Mixed
5307 * construct is contained in the replacement text for a parameter
5308 * entity, both must be contained in the same replacement text. For
5309 * interoperability, if a parameter-entity reference appears in a
5310 * choice, seq, or Mixed construct, its replacement text should not
5311 * be empty, and neither the first nor last non-blank character of
5312 * the replacement text should be a connector (| or ,).
5313 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005314 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005315 * hierarchy.
5316 */
5317xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005319 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005320 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 xmlChar type = 0;
5322
5323 SKIP_BLANKS;
5324 GROW;
5325 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005326 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005327
Owen Taylor3473f882001-02-23 17:55:21 +00005328 /* Recurse on first child */
5329 NEXT;
5330 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005331 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005332 SKIP_BLANKS;
5333 GROW;
5334 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005335 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005337 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 return(NULL);
5339 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005340 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005341 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005343 return(NULL);
5344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345 GROW;
5346 if (RAW == '?') {
5347 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5348 NEXT;
5349 } else if (RAW == '*') {
5350 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5351 NEXT;
5352 } else if (RAW == '+') {
5353 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5354 NEXT;
5355 } else {
5356 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5357 }
Owen Taylor3473f882001-02-23 17:55:21 +00005358 GROW;
5359 }
5360 SKIP_BLANKS;
5361 SHRINK;
5362 while (RAW != ')') {
5363 /*
5364 * Each loop we parse one separator and one element.
5365 */
5366 if (RAW == ',') {
5367 if (type == 0) type = CUR;
5368
5369 /*
5370 * Detect "Name | Name , Name" error
5371 */
5372 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005374 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005375 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005376 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005377 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005378 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005379 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 return(NULL);
5381 }
5382 NEXT;
5383
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005384 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005385 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005386 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005387 xmlFreeDocElementContent(ctxt->myDoc, last);
5388 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005389 return(NULL);
5390 }
5391 if (last == NULL) {
5392 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005393 if (ret != NULL)
5394 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005395 ret = cur = op;
5396 } else {
5397 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005398 if (op != NULL)
5399 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005401 if (last != NULL)
5402 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 cur =op;
5404 last = NULL;
5405 }
5406 } else if (RAW == '|') {
5407 if (type == 0) type = CUR;
5408
5409 /*
5410 * Detect "Name , Name | Name" error
5411 */
5412 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005413 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005414 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005415 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005416 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005417 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005419 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 return(NULL);
5421 }
5422 NEXT;
5423
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005424 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005425 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005426 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005427 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005429 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005430 return(NULL);
5431 }
5432 if (last == NULL) {
5433 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005434 if (ret != NULL)
5435 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 ret = cur = op;
5437 } else {
5438 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005439 if (op != NULL)
5440 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005441 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005442 if (last != NULL)
5443 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005444 cur =op;
5445 last = NULL;
5446 }
5447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005448 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005449 if ((last != NULL) && (last != ret))
5450 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005452 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 return(NULL);
5454 }
5455 GROW;
5456 SKIP_BLANKS;
5457 GROW;
5458 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005459 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005460 /* Recurse on second child */
5461 NEXT;
5462 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005463 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 SKIP_BLANKS;
5465 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005466 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005467 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005470 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005471 return(NULL);
5472 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005473 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005474 if (RAW == '?') {
5475 last->ocur = XML_ELEMENT_CONTENT_OPT;
5476 NEXT;
5477 } else if (RAW == '*') {
5478 last->ocur = XML_ELEMENT_CONTENT_MULT;
5479 NEXT;
5480 } else if (RAW == '+') {
5481 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5482 NEXT;
5483 } else {
5484 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5485 }
5486 }
5487 SKIP_BLANKS;
5488 GROW;
5489 }
5490 if ((cur != NULL) && (last != NULL)) {
5491 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005492 if (last != NULL)
5493 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005495 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005496 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5497"Element content declaration doesn't start and stop in the same entity\n",
5498 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005499 }
Owen Taylor3473f882001-02-23 17:55:21 +00005500 NEXT;
5501 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005502 if (ret != NULL) {
5503 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5504 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5505 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5506 else
5507 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5508 }
Owen Taylor3473f882001-02-23 17:55:21 +00005509 NEXT;
5510 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005511 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005512 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005513 cur = ret;
5514 /*
5515 * Some normalization:
5516 * (a | b* | c?)* == (a | b | c)*
5517 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005518 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005519 if ((cur->c1 != NULL) &&
5520 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5521 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5522 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5523 if ((cur->c2 != NULL) &&
5524 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5525 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5526 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5527 cur = cur->c2;
5528 }
5529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 NEXT;
5531 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005532 if (ret != NULL) {
5533 int found = 0;
5534
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005535 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5536 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5537 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005538 else
5539 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005540 /*
5541 * Some normalization:
5542 * (a | b*)+ == (a | b)*
5543 * (a | b?)+ == (a | b)*
5544 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005545 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005546 if ((cur->c1 != NULL) &&
5547 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5548 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5549 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5550 found = 1;
5551 }
5552 if ((cur->c2 != NULL) &&
5553 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5554 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5555 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5556 found = 1;
5557 }
5558 cur = cur->c2;
5559 }
5560 if (found)
5561 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5562 }
Owen Taylor3473f882001-02-23 17:55:21 +00005563 NEXT;
5564 }
5565 return(ret);
5566}
5567
5568/**
5569 * xmlParseElementContentDecl:
5570 * @ctxt: an XML parser context
5571 * @name: the name of the element being defined.
5572 * @result: the Element Content pointer will be stored here if any
5573 *
5574 * parse the declaration for an Element content either Mixed or Children,
5575 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5576 *
5577 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5578 *
5579 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5580 */
5581
5582int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005583xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005584 xmlElementContentPtr *result) {
5585
5586 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005588 int res;
5589
5590 *result = NULL;
5591
5592 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005593 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005594 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005595 return(-1);
5596 }
5597 NEXT;
5598 GROW;
5599 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005600 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005601 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 res = XML_ELEMENT_TYPE_MIXED;
5603 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005604 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005605 res = XML_ELEMENT_TYPE_ELEMENT;
5606 }
Owen Taylor3473f882001-02-23 17:55:21 +00005607 SKIP_BLANKS;
5608 *result = tree;
5609 return(res);
5610}
5611
5612/**
5613 * xmlParseElementDecl:
5614 * @ctxt: an XML parser context
5615 *
5616 * parse an Element declaration.
5617 *
5618 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5619 *
5620 * [ VC: Unique Element Type Declaration ]
5621 * No element type may be declared more than once
5622 *
5623 * Returns the type of the element, or -1 in case of error
5624 */
5625int
5626xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005627 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005628 int ret = -1;
5629 xmlElementContentPtr content = NULL;
5630
Daniel Veillard4c778d82005-01-23 17:37:44 +00005631 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005632 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005633 xmlParserInputPtr input = ctxt->input;
5634
5635 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005636 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5638 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005639 }
5640 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005641 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5644 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005645 return(-1);
5646 }
5647 while ((RAW == 0) && (ctxt->inputNr > 1))
5648 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005649 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005650 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5651 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005652 }
5653 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005654 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005655 SKIP(5);
5656 /*
5657 * Element must always be empty.
5658 */
5659 ret = XML_ELEMENT_TYPE_EMPTY;
5660 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5661 (NXT(2) == 'Y')) {
5662 SKIP(3);
5663 /*
5664 * Element is a generic container.
5665 */
5666 ret = XML_ELEMENT_TYPE_ANY;
5667 } else if (RAW == '(') {
5668 ret = xmlParseElementContentDecl(ctxt, name, &content);
5669 } else {
5670 /*
5671 * [ WFC: PEs in Internal Subset ] error handling.
5672 */
5673 if ((RAW == '%') && (ctxt->external == 0) &&
5674 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005675 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005676 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005677 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005678 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005679 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5680 }
Owen Taylor3473f882001-02-23 17:55:21 +00005681 return(-1);
5682 }
5683
5684 SKIP_BLANKS;
5685 /*
5686 * Pop-up of finished entities.
5687 */
5688 while ((RAW == 0) && (ctxt->inputNr > 1))
5689 xmlPopInput(ctxt);
5690 SKIP_BLANKS;
5691
5692 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005693 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005694 if (content != NULL) {
5695 xmlFreeDocElementContent(ctxt->myDoc, content);
5696 }
Owen Taylor3473f882001-02-23 17:55:21 +00005697 } else {
5698 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005699 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5700 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005701 }
5702
5703 NEXT;
5704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005705 (ctxt->sax->elementDecl != NULL)) {
5706 if (content != NULL)
5707 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005708 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5709 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005710 if ((content != NULL) && (content->parent == NULL)) {
5711 /*
5712 * this is a trick: if xmlAddElementDecl is called,
5713 * instead of copying the full tree it is plugged directly
5714 * if called from the parser. Avoid duplicating the
5715 * interfaces or change the API/ABI
5716 */
5717 xmlFreeDocElementContent(ctxt->myDoc, content);
5718 }
5719 } else if (content != NULL) {
5720 xmlFreeDocElementContent(ctxt->myDoc, content);
5721 }
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
Owen Taylor3473f882001-02-23 17:55:21 +00005723 }
5724 return(ret);
5725}
5726
5727/**
Owen Taylor3473f882001-02-23 17:55:21 +00005728 * xmlParseConditionalSections
5729 * @ctxt: an XML parser context
5730 *
5731 * [61] conditionalSect ::= includeSect | ignoreSect
5732 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5733 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5734 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5735 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5736 */
5737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005738static void
Owen Taylor3473f882001-02-23 17:55:21 +00005739xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5740 SKIP(3);
5741 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005742 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005743 SKIP(7);
5744 SKIP_BLANKS;
5745 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 } else {
5748 NEXT;
5749 }
5750 if (xmlParserDebugEntities) {
5751 if ((ctxt->input != NULL) && (ctxt->input->filename))
5752 xmlGenericError(xmlGenericErrorContext,
5753 "%s(%d): ", ctxt->input->filename,
5754 ctxt->input->line);
5755 xmlGenericError(xmlGenericErrorContext,
5756 "Entering INCLUDE Conditional Section\n");
5757 }
5758
5759 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5760 (NXT(2) != '>'))) {
5761 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005762 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005763
5764 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5765 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005766 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005767 NEXT;
5768 } else if (RAW == '%') {
5769 xmlParsePEReference(ctxt);
5770 } else
5771 xmlParseMarkupDecl(ctxt);
5772
5773 /*
5774 * Pop-up of finished entities.
5775 */
5776 while ((RAW == 0) && (ctxt->inputNr > 1))
5777 xmlPopInput(ctxt);
5778
Daniel Veillardfdc91562002-07-01 21:52:03 +00005779 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005780 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005781 break;
5782 }
5783 }
5784 if (xmlParserDebugEntities) {
5785 if ((ctxt->input != NULL) && (ctxt->input->filename))
5786 xmlGenericError(xmlGenericErrorContext,
5787 "%s(%d): ", ctxt->input->filename,
5788 ctxt->input->line);
5789 xmlGenericError(xmlGenericErrorContext,
5790 "Leaving INCLUDE Conditional Section\n");
5791 }
5792
Daniel Veillarda07050d2003-10-19 14:46:32 +00005793 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005794 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005795 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005796 int depth = 0;
5797
5798 SKIP(6);
5799 SKIP_BLANKS;
5800 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 } else {
5803 NEXT;
5804 }
5805 if (xmlParserDebugEntities) {
5806 if ((ctxt->input != NULL) && (ctxt->input->filename))
5807 xmlGenericError(xmlGenericErrorContext,
5808 "%s(%d): ", ctxt->input->filename,
5809 ctxt->input->line);
5810 xmlGenericError(xmlGenericErrorContext,
5811 "Entering IGNORE Conditional Section\n");
5812 }
5813
5814 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005815 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005816 * But disable SAX event generating DTD building in the meantime
5817 */
5818 state = ctxt->disableSAX;
5819 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 ctxt->instate = XML_PARSER_IGNORE;
5822
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005823 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005824 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5825 depth++;
5826 SKIP(3);
5827 continue;
5828 }
5829 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5830 if (--depth >= 0) SKIP(3);
5831 continue;
5832 }
5833 NEXT;
5834 continue;
5835 }
5836
5837 ctxt->disableSAX = state;
5838 ctxt->instate = instate;
5839
5840 if (xmlParserDebugEntities) {
5841 if ((ctxt->input != NULL) && (ctxt->input->filename))
5842 xmlGenericError(xmlGenericErrorContext,
5843 "%s(%d): ", ctxt->input->filename,
5844 ctxt->input->line);
5845 xmlGenericError(xmlGenericErrorContext,
5846 "Leaving IGNORE Conditional Section\n");
5847 }
5848
5849 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005850 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 }
5852
5853 if (RAW == 0)
5854 SHRINK;
5855
5856 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005857 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 } else {
5859 SKIP(3);
5860 }
5861}
5862
5863/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005864 * xmlParseMarkupDecl:
5865 * @ctxt: an XML parser context
5866 *
5867 * parse Markup declarations
5868 *
5869 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5870 * NotationDecl | PI | Comment
5871 *
5872 * [ VC: Proper Declaration/PE Nesting ]
5873 * Parameter-entity replacement text must be properly nested with
5874 * markup declarations. That is to say, if either the first character
5875 * or the last character of a markup declaration (markupdecl above) is
5876 * contained in the replacement text for a parameter-entity reference,
5877 * both must be contained in the same replacement text.
5878 *
5879 * [ WFC: PEs in Internal Subset ]
5880 * In the internal DTD subset, parameter-entity references can occur
5881 * only where markup declarations can occur, not within markup declarations.
5882 * (This does not apply to references that occur in external parameter
5883 * entities or to the external subset.)
5884 */
5885void
5886xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5887 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005888 if (CUR == '<') {
5889 if (NXT(1) == '!') {
5890 switch (NXT(2)) {
5891 case 'E':
5892 if (NXT(3) == 'L')
5893 xmlParseElementDecl(ctxt);
5894 else if (NXT(3) == 'N')
5895 xmlParseEntityDecl(ctxt);
5896 break;
5897 case 'A':
5898 xmlParseAttributeListDecl(ctxt);
5899 break;
5900 case 'N':
5901 xmlParseNotationDecl(ctxt);
5902 break;
5903 case '-':
5904 xmlParseComment(ctxt);
5905 break;
5906 default:
5907 /* there is an error but it will be detected later */
5908 break;
5909 }
5910 } else if (NXT(1) == '?') {
5911 xmlParsePI(ctxt);
5912 }
5913 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005914 /*
5915 * This is only for internal subset. On external entities,
5916 * the replacement is done before parsing stage
5917 */
5918 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5919 xmlParsePEReference(ctxt);
5920
5921 /*
5922 * Conditional sections are allowed from entities included
5923 * by PE References in the internal subset.
5924 */
5925 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5926 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5927 xmlParseConditionalSections(ctxt);
5928 }
5929 }
5930
5931 ctxt->instate = XML_PARSER_DTD;
5932}
5933
5934/**
5935 * xmlParseTextDecl:
5936 * @ctxt: an XML parser context
5937 *
5938 * parse an XML declaration header for external entities
5939 *
5940 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5941 *
5942 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5943 */
5944
5945void
5946xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5947 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005948 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005949
5950 /*
5951 * We know that '<?xml' is here.
5952 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005953 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005954 SKIP(5);
5955 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005956 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005957 return;
5958 }
5959
William M. Brack76e95df2003-10-18 16:20:14 +00005960 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005961 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5962 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005963 }
5964 SKIP_BLANKS;
5965
5966 /*
5967 * We may have the VersionInfo here.
5968 */
5969 version = xmlParseVersionInfo(ctxt);
5970 if (version == NULL)
5971 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005972 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005973 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5975 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005976 }
5977 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005978 ctxt->input->version = version;
5979
5980 /*
5981 * We must have the encoding declaration
5982 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005983 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005984 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5985 /*
5986 * The XML REC instructs us to stop parsing right here
5987 */
5988 return;
5989 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005990 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5991 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5992 "Missing encoding in text declaration\n");
5993 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005994
5995 SKIP_BLANKS;
5996 if ((RAW == '?') && (NXT(1) == '>')) {
5997 SKIP(2);
5998 } else if (RAW == '>') {
5999 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006000 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006001 NEXT;
6002 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006003 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006004 MOVETO_ENDTAG(CUR_PTR);
6005 NEXT;
6006 }
6007}
6008
6009/**
Owen Taylor3473f882001-02-23 17:55:21 +00006010 * xmlParseExternalSubset:
6011 * @ctxt: an XML parser context
6012 * @ExternalID: the external identifier
6013 * @SystemID: the system identifier (or URL)
6014 *
6015 * parse Markup declarations from an external subset
6016 *
6017 * [30] extSubset ::= textDecl? extSubsetDecl
6018 *
6019 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6020 */
6021void
6022xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6023 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006024 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006025 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006026 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 xmlParseTextDecl(ctxt);
6028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6029 /*
6030 * The XML REC instructs us to stop parsing right here
6031 */
6032 ctxt->instate = XML_PARSER_EOF;
6033 return;
6034 }
6035 }
6036 if (ctxt->myDoc == NULL) {
6037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6038 }
6039 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6040 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6041
6042 ctxt->instate = XML_PARSER_DTD;
6043 ctxt->external = 1;
6044 while (((RAW == '<') && (NXT(1) == '?')) ||
6045 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006046 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006047 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006048 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006049
6050 GROW;
6051 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6052 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006053 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006054 NEXT;
6055 } else if (RAW == '%') {
6056 xmlParsePEReference(ctxt);
6057 } else
6058 xmlParseMarkupDecl(ctxt);
6059
6060 /*
6061 * Pop-up of finished entities.
6062 */
6063 while ((RAW == 0) && (ctxt->inputNr > 1))
6064 xmlPopInput(ctxt);
6065
Daniel Veillardfdc91562002-07-01 21:52:03 +00006066 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006067 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 break;
6069 }
6070 }
6071
6072 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006073 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 }
6075
6076}
6077
6078/**
6079 * xmlParseReference:
6080 * @ctxt: an XML parser context
6081 *
6082 * parse and handle entity references in content, depending on the SAX
6083 * interface, this may end-up in a call to character() if this is a
6084 * CharRef, a predefined entity, if there is no reference() callback.
6085 * or if the parser was asked to switch to that mode.
6086 *
6087 * [67] Reference ::= EntityRef | CharRef
6088 */
6089void
6090xmlParseReference(xmlParserCtxtPtr ctxt) {
6091 xmlEntityPtr ent;
6092 xmlChar *val;
6093 if (RAW != '&') return;
6094
6095 if (NXT(1) == '#') {
6096 int i = 0;
6097 xmlChar out[10];
6098 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006099 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006100
6101 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6102 /*
6103 * So we are using non-UTF-8 buffers
6104 * Check that the char fit on 8bits, if not
6105 * generate a CharRef.
6106 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006107 if (value <= 0xFF) {
6108 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006109 out[1] = 0;
6110 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6111 (!ctxt->disableSAX))
6112 ctxt->sax->characters(ctxt->userData, out, 1);
6113 } else {
6114 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006115 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006116 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006117 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6119 (!ctxt->disableSAX))
6120 ctxt->sax->reference(ctxt->userData, out);
6121 }
6122 } else {
6123 /*
6124 * Just encode the value in UTF-8
6125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006126 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006127 out[i] = 0;
6128 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6129 (!ctxt->disableSAX))
6130 ctxt->sax->characters(ctxt->userData, out, i);
6131 }
6132 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006133 int was_checked;
6134
Owen Taylor3473f882001-02-23 17:55:21 +00006135 ent = xmlParseEntityRef(ctxt);
6136 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006137 if (!ctxt->wellFormed)
6138 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006139 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006140 if ((ent->name != NULL) &&
6141 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6142 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006143 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006144
6145
6146 /*
6147 * The first reference to the entity trigger a parsing phase
6148 * where the ent->children is filled with the result from
6149 * the parsing.
6150 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006151 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006152 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006153
Owen Taylor3473f882001-02-23 17:55:21 +00006154 value = ent->content;
6155
6156 /*
6157 * Check that this entity is well formed
6158 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006159 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006160 (value[1] == 0) && (value[0] == '<') &&
6161 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6162 /*
6163 * DONE: get definite answer on this !!!
6164 * Lots of entity decls are used to declare a single
6165 * char
6166 * <!ENTITY lt "<">
6167 * Which seems to be valid since
6168 * 2.4: The ampersand character (&) and the left angle
6169 * bracket (<) may appear in their literal form only
6170 * when used ... They are also legal within the literal
6171 * entity value of an internal entity declaration;i
6172 * see "4.3.2 Well-Formed Parsed Entities".
6173 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6174 * Looking at the OASIS test suite and James Clark
6175 * tests, this is broken. However the XML REC uses
6176 * it. Is the XML REC not well-formed ????
6177 * This is a hack to avoid this problem
6178 *
6179 * ANSWER: since lt gt amp .. are already defined,
6180 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006181 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006182 * is lousy but acceptable.
6183 */
6184 list = xmlNewDocText(ctxt->myDoc, value);
6185 if (list != NULL) {
6186 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6187 (ent->children == NULL)) {
6188 ent->children = list;
6189 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006190 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006191 list->parent = (xmlNodePtr) ent;
6192 } else {
6193 xmlFreeNodeList(list);
6194 }
6195 } else if (list != NULL) {
6196 xmlFreeNodeList(list);
6197 }
6198 } else {
6199 /*
6200 * 4.3.2: An internal general parsed entity is well-formed
6201 * if its replacement text matches the production labeled
6202 * content.
6203 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006204
6205 void *user_data;
6206 /*
6207 * This is a bit hackish but this seems the best
6208 * way to make sure both SAX and DOM entity support
6209 * behaves okay.
6210 */
6211 if (ctxt->userData == ctxt)
6212 user_data = NULL;
6213 else
6214 user_data = ctxt->userData;
6215
Owen Taylor3473f882001-02-23 17:55:21 +00006216 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6217 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006218 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6219 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006220 ctxt->depth--;
6221 } else if (ent->etype ==
6222 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6223 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006224 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006225 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006226 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 ctxt->depth--;
6228 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006229 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006230 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6231 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 }
6233 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006234 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006235 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006236 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006237 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6238 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006239 (ent->children == NULL)) {
6240 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006241 if (ctxt->replaceEntities) {
6242 /*
6243 * Prune it directly in the generated document
6244 * except for single text nodes.
6245 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006246 if (((list->type == XML_TEXT_NODE) &&
6247 (list->next == NULL)) ||
6248 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006249 list->parent = (xmlNodePtr) ent;
6250 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006251 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006252 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006253 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006254 while (list != NULL) {
6255 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006256 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006257 if (list->next == NULL)
6258 ent->last = list;
6259 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006260 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006261 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006262#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006265#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006266 }
6267 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006268 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006269 while (list != NULL) {
6270 list->parent = (xmlNodePtr) ent;
6271 if (list->next == NULL)
6272 ent->last = list;
6273 list = list->next;
6274 }
Owen Taylor3473f882001-02-23 17:55:21 +00006275 }
6276 } else {
6277 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006280 } else if ((ret != XML_ERR_OK) &&
6281 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006282 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6283 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006284 } else if (list != NULL) {
6285 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006286 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006287 }
6288 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006289 ent->checked = 1;
6290 }
6291
6292 if (ent->children == NULL) {
6293 /*
6294 * Probably running in SAX mode and the callbacks don't
6295 * build the entity content. So unless we already went
6296 * though parsing for first checking go though the entity
6297 * content to generate callbacks associated to the entity
6298 */
6299 if (was_checked == 1) {
6300 void *user_data;
6301 /*
6302 * This is a bit hackish but this seems the best
6303 * way to make sure both SAX and DOM entity support
6304 * behaves okay.
6305 */
6306 if (ctxt->userData == ctxt)
6307 user_data = NULL;
6308 else
6309 user_data = ctxt->userData;
6310
6311 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6312 ctxt->depth++;
6313 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6314 ent->content, user_data, NULL);
6315 ctxt->depth--;
6316 } else if (ent->etype ==
6317 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6318 ctxt->depth++;
6319 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6320 ctxt->sax, user_data, ctxt->depth,
6321 ent->URI, ent->ExternalID, NULL);
6322 ctxt->depth--;
6323 } else {
6324 ret = XML_ERR_ENTITY_PE_INTERNAL;
6325 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6326 "invalid entity type found\n", NULL);
6327 }
6328 if (ret == XML_ERR_ENTITY_LOOP) {
6329 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6330 return;
6331 }
6332 }
6333 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6334 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6335 /*
6336 * Entity reference callback comes second, it's somewhat
6337 * superfluous but a compatibility to historical behaviour
6338 */
6339 ctxt->sax->reference(ctxt->userData, ent->name);
6340 }
6341 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006342 }
6343 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006344 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006345 /*
6346 * Create a node.
6347 */
6348 ctxt->sax->reference(ctxt->userData, ent->name);
6349 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006350 }
6351 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006352 /*
6353 * There is a problem on the handling of _private for entities
6354 * (bug 155816): Should we copy the content of the field from
6355 * the entity (possibly overwriting some value set by the user
6356 * when a copy is created), should we leave it alone, or should
6357 * we try to take care of different situations? The problem
6358 * is exacerbated by the usage of this field by the xmlReader.
6359 * To fix this bug, we look at _private on the created node
6360 * and, if it's NULL, we copy in whatever was in the entity.
6361 * If it's not NULL we leave it alone. This is somewhat of a
6362 * hack - maybe we should have further tests to determine
6363 * what to do.
6364 */
Owen Taylor3473f882001-02-23 17:55:21 +00006365 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6366 /*
6367 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006368 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006369 * In the first occurrence list contains the replacement.
6370 * progressive == 2 means we are operating on the Reader
6371 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006372 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006373 if (((list == NULL) && (ent->owner == 0)) ||
6374 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006375 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006376
6377 /*
6378 * when operating on a reader, the entities definitions
6379 * are always owning the entities subtree.
6380 if (ctxt->parseMode == XML_PARSE_READER)
6381 ent->owner = 1;
6382 */
6383
Daniel Veillard62f313b2001-07-04 19:49:14 +00006384 cur = ent->children;
6385 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006386 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006387 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006388 if (nw->_private == NULL)
6389 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006390 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006391 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006392 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006393 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006394 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006395 if (cur == ent->last) {
6396 /*
6397 * needed to detect some strange empty
6398 * node cases in the reader tests
6399 */
6400 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006401 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006402 (nw->type == XML_ELEMENT_NODE) &&
6403 (nw->children == NULL))
6404 nw->extra = 1;
6405
Daniel Veillard62f313b2001-07-04 19:49:14 +00006406 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006407 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006408 cur = cur->next;
6409 }
Daniel Veillard81273902003-09-30 00:43:48 +00006410#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006411 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006412 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006413#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006414 } else if (list == NULL) {
6415 xmlNodePtr nw = NULL, cur, next, last,
6416 firstChild = NULL;
6417 /*
6418 * Copy the entity child list and make it the new
6419 * entity child list. The goal is to make sure any
6420 * ID or REF referenced will be the one from the
6421 * document content and not the entity copy.
6422 */
6423 cur = ent->children;
6424 ent->children = NULL;
6425 last = ent->last;
6426 ent->last = NULL;
6427 while (cur != NULL) {
6428 next = cur->next;
6429 cur->next = NULL;
6430 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006431 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006432 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006433 if (nw->_private == NULL)
6434 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006435 if (firstChild == NULL){
6436 firstChild = cur;
6437 }
6438 xmlAddChild((xmlNodePtr) ent, nw);
6439 xmlAddChild(ctxt->node, cur);
6440 }
6441 if (cur == last)
6442 break;
6443 cur = next;
6444 }
6445 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006446#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006447 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6448 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006449#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006450 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006451 const xmlChar *nbktext;
6452
Daniel Veillard62f313b2001-07-04 19:49:14 +00006453 /*
6454 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006455 * node with a possible previous text one which
6456 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006457 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006458 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6459 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006460 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006461 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006462 if ((ent->last != ent->children) &&
6463 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006464 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006465 xmlAddChildList(ctxt->node, ent->children);
6466 }
6467
Owen Taylor3473f882001-02-23 17:55:21 +00006468 /*
6469 * This is to avoid a nasty side effect, see
6470 * characters() in SAX.c
6471 */
6472 ctxt->nodemem = 0;
6473 ctxt->nodelen = 0;
6474 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006475 }
6476 }
6477 } else {
6478 val = ent->content;
6479 if (val == NULL) return;
6480 /*
6481 * inline the entity.
6482 */
6483 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6484 (!ctxt->disableSAX))
6485 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6486 }
6487 }
6488}
6489
6490/**
6491 * xmlParseEntityRef:
6492 * @ctxt: an XML parser context
6493 *
6494 * parse ENTITY references declarations
6495 *
6496 * [68] EntityRef ::= '&' Name ';'
6497 *
6498 * [ WFC: Entity Declared ]
6499 * In a document without any DTD, a document with only an internal DTD
6500 * subset which contains no parameter entity references, or a document
6501 * with "standalone='yes'", the Name given in the entity reference
6502 * must match that in an entity declaration, except that well-formed
6503 * documents need not declare any of the following entities: amp, lt,
6504 * gt, apos, quot. The declaration of a parameter entity must precede
6505 * any reference to it. Similarly, the declaration of a general entity
6506 * must precede any reference to it which appears in a default value in an
6507 * attribute-list declaration. Note that if entities are declared in the
6508 * external subset or in external parameter entities, a non-validating
6509 * processor is not obligated to read and process their declarations;
6510 * for such documents, the rule that an entity must be declared is a
6511 * well-formedness constraint only if standalone='yes'.
6512 *
6513 * [ WFC: Parsed Entity ]
6514 * An entity reference must not contain the name of an unparsed entity
6515 *
6516 * Returns the xmlEntityPtr if found, or NULL otherwise.
6517 */
6518xmlEntityPtr
6519xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006520 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006521 xmlEntityPtr ent = NULL;
6522
6523 GROW;
6524
6525 if (RAW == '&') {
6526 NEXT;
6527 name = xmlParseName(ctxt);
6528 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006529 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6530 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006531 } else {
6532 if (RAW == ';') {
6533 NEXT;
6534 /*
6535 * Ask first SAX for entity resolution, otherwise try the
6536 * predefined set.
6537 */
6538 if (ctxt->sax != NULL) {
6539 if (ctxt->sax->getEntity != NULL)
6540 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006541 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006542 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006543 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6544 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006545 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006546 }
Owen Taylor3473f882001-02-23 17:55:21 +00006547 }
6548 /*
6549 * [ WFC: Entity Declared ]
6550 * In a document without any DTD, a document with only an
6551 * internal DTD subset which contains no parameter entity
6552 * references, or a document with "standalone='yes'", the
6553 * Name given in the entity reference must match that in an
6554 * entity declaration, except that well-formed documents
6555 * need not declare any of the following entities: amp, lt,
6556 * gt, apos, quot.
6557 * The declaration of a parameter entity must precede any
6558 * reference to it.
6559 * Similarly, the declaration of a general entity must
6560 * precede any reference to it which appears in a default
6561 * value in an attribute-list declaration. Note that if
6562 * entities are declared in the external subset or in
6563 * external parameter entities, a non-validating processor
6564 * is not obligated to read and process their declarations;
6565 * for such documents, the rule that an entity must be
6566 * declared is a well-formedness constraint only if
6567 * standalone='yes'.
6568 */
6569 if (ent == NULL) {
6570 if ((ctxt->standalone == 1) ||
6571 ((ctxt->hasExternalSubset == 0) &&
6572 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006574 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006575 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006576 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006577 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006578 if ((ctxt->inSubset == 0) &&
6579 (ctxt->sax != NULL) &&
6580 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006581 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006582 }
Owen Taylor3473f882001-02-23 17:55:21 +00006583 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006584 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
6586
6587 /*
6588 * [ WFC: Parsed Entity ]
6589 * An entity reference must not contain the name of an
6590 * unparsed entity
6591 */
6592 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006593 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006594 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006595 }
6596
6597 /*
6598 * [ WFC: No External Entity References ]
6599 * Attribute values cannot contain direct or indirect
6600 * entity references to external entities.
6601 */
6602 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6603 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006604 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6605 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006606 }
6607 /*
6608 * [ WFC: No < in Attribute Values ]
6609 * The replacement text of any entity referred to directly or
6610 * indirectly in an attribute value (other than "&lt;") must
6611 * not contain a <.
6612 */
6613 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6614 (ent != NULL) &&
6615 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6616 (ent->content != NULL) &&
6617 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006618 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006619 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006620 }
6621
6622 /*
6623 * Internal check, no parameter entities here ...
6624 */
6625 else {
6626 switch (ent->etype) {
6627 case XML_INTERNAL_PARAMETER_ENTITY:
6628 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006629 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6630 "Attempt to reference the parameter entity '%s'\n",
6631 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006632 break;
6633 default:
6634 break;
6635 }
6636 }
6637
6638 /*
6639 * [ WFC: No Recursion ]
6640 * A parsed entity must not contain a recursive reference
6641 * to itself, either directly or indirectly.
6642 * Done somewhere else
6643 */
6644
6645 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006646 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 }
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649 }
6650 return(ent);
6651}
6652
6653/**
6654 * xmlParseStringEntityRef:
6655 * @ctxt: an XML parser context
6656 * @str: a pointer to an index in the string
6657 *
6658 * parse ENTITY references declarations, but this version parses it from
6659 * a string value.
6660 *
6661 * [68] EntityRef ::= '&' Name ';'
6662 *
6663 * [ WFC: Entity Declared ]
6664 * In a document without any DTD, a document with only an internal DTD
6665 * subset which contains no parameter entity references, or a document
6666 * with "standalone='yes'", the Name given in the entity reference
6667 * must match that in an entity declaration, except that well-formed
6668 * documents need not declare any of the following entities: amp, lt,
6669 * gt, apos, quot. The declaration of a parameter entity must precede
6670 * any reference to it. Similarly, the declaration of a general entity
6671 * must precede any reference to it which appears in a default value in an
6672 * attribute-list declaration. Note that if entities are declared in the
6673 * external subset or in external parameter entities, a non-validating
6674 * processor is not obligated to read and process their declarations;
6675 * for such documents, the rule that an entity must be declared is a
6676 * well-formedness constraint only if standalone='yes'.
6677 *
6678 * [ WFC: Parsed Entity ]
6679 * An entity reference must not contain the name of an unparsed entity
6680 *
6681 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6682 * is updated to the current location in the string.
6683 */
6684xmlEntityPtr
6685xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6686 xmlChar *name;
6687 const xmlChar *ptr;
6688 xmlChar cur;
6689 xmlEntityPtr ent = NULL;
6690
6691 if ((str == NULL) || (*str == NULL))
6692 return(NULL);
6693 ptr = *str;
6694 cur = *ptr;
6695 if (cur == '&') {
6696 ptr++;
6697 cur = *ptr;
6698 name = xmlParseStringName(ctxt, &ptr);
6699 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006700 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6701 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006702 } else {
6703 if (*ptr == ';') {
6704 ptr++;
6705 /*
6706 * Ask first SAX for entity resolution, otherwise try the
6707 * predefined set.
6708 */
6709 if (ctxt->sax != NULL) {
6710 if (ctxt->sax->getEntity != NULL)
6711 ent = ctxt->sax->getEntity(ctxt->userData, name);
6712 if (ent == NULL)
6713 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006714 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006715 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006716 }
Owen Taylor3473f882001-02-23 17:55:21 +00006717 }
6718 /*
6719 * [ WFC: Entity Declared ]
6720 * In a document without any DTD, a document with only an
6721 * internal DTD subset which contains no parameter entity
6722 * references, or a document with "standalone='yes'", the
6723 * Name given in the entity reference must match that in an
6724 * entity declaration, except that well-formed documents
6725 * need not declare any of the following entities: amp, lt,
6726 * gt, apos, quot.
6727 * The declaration of a parameter entity must precede any
6728 * reference to it.
6729 * Similarly, the declaration of a general entity must
6730 * precede any reference to it which appears in a default
6731 * value in an attribute-list declaration. Note that if
6732 * entities are declared in the external subset or in
6733 * external parameter entities, a non-validating processor
6734 * is not obligated to read and process their declarations;
6735 * for such documents, the rule that an entity must be
6736 * declared is a well-formedness constraint only if
6737 * standalone='yes'.
6738 */
6739 if (ent == NULL) {
6740 if ((ctxt->standalone == 1) ||
6741 ((ctxt->hasExternalSubset == 0) &&
6742 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006743 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006744 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006745 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006746 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006747 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006748 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006749 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006750 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006751 }
6752
6753 /*
6754 * [ WFC: Parsed Entity ]
6755 * An entity reference must not contain the name of an
6756 * unparsed entity
6757 */
6758 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006759 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006760 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006761 }
6762
6763 /*
6764 * [ WFC: No External Entity References ]
6765 * Attribute values cannot contain direct or indirect
6766 * entity references to external entities.
6767 */
6768 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6769 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006770 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006771 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006772 }
6773 /*
6774 * [ WFC: No < in Attribute Values ]
6775 * The replacement text of any entity referred to directly or
6776 * indirectly in an attribute value (other than "&lt;") must
6777 * not contain a <.
6778 */
6779 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6780 (ent != NULL) &&
6781 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6782 (ent->content != NULL) &&
6783 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006784 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6785 "'<' in entity '%s' is not allowed in attributes values\n",
6786 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006787 }
6788
6789 /*
6790 * Internal check, no parameter entities here ...
6791 */
6792 else {
6793 switch (ent->etype) {
6794 case XML_INTERNAL_PARAMETER_ENTITY:
6795 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006796 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6797 "Attempt to reference the parameter entity '%s'\n",
6798 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006799 break;
6800 default:
6801 break;
6802 }
6803 }
6804
6805 /*
6806 * [ WFC: No Recursion ]
6807 * A parsed entity must not contain a recursive reference
6808 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006809 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006810 */
6811
6812 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006813 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006814 }
6815 xmlFree(name);
6816 }
6817 }
6818 *str = ptr;
6819 return(ent);
6820}
6821
6822/**
6823 * xmlParsePEReference:
6824 * @ctxt: an XML parser context
6825 *
6826 * parse PEReference declarations
6827 * The entity content is handled directly by pushing it's content as
6828 * a new input stream.
6829 *
6830 * [69] PEReference ::= '%' Name ';'
6831 *
6832 * [ WFC: No Recursion ]
6833 * A parsed entity must not contain a recursive
6834 * reference to itself, either directly or indirectly.
6835 *
6836 * [ WFC: Entity Declared ]
6837 * In a document without any DTD, a document with only an internal DTD
6838 * subset which contains no parameter entity references, or a document
6839 * with "standalone='yes'", ... ... The declaration of a parameter
6840 * entity must precede any reference to it...
6841 *
6842 * [ VC: Entity Declared ]
6843 * In a document with an external subset or external parameter entities
6844 * with "standalone='no'", ... ... The declaration of a parameter entity
6845 * must precede any reference to it...
6846 *
6847 * [ WFC: In DTD ]
6848 * Parameter-entity references may only appear in the DTD.
6849 * NOTE: misleading but this is handled.
6850 */
6851void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006852xmlParsePEReference(xmlParserCtxtPtr ctxt)
6853{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006854 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006855 xmlEntityPtr entity = NULL;
6856 xmlParserInputPtr input;
6857
6858 if (RAW == '%') {
6859 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006860 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006861 if (name == NULL) {
6862 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6863 "xmlParsePEReference: no name\n");
6864 } else {
6865 if (RAW == ';') {
6866 NEXT;
6867 if ((ctxt->sax != NULL) &&
6868 (ctxt->sax->getParameterEntity != NULL))
6869 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6870 name);
6871 if (entity == NULL) {
6872 /*
6873 * [ WFC: Entity Declared ]
6874 * In a document without any DTD, a document with only an
6875 * internal DTD subset which contains no parameter entity
6876 * references, or a document with "standalone='yes'", ...
6877 * ... The declaration of a parameter entity must precede
6878 * any reference to it...
6879 */
6880 if ((ctxt->standalone == 1) ||
6881 ((ctxt->hasExternalSubset == 0) &&
6882 (ctxt->hasPErefs == 0))) {
6883 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6884 "PEReference: %%%s; not found\n",
6885 name);
6886 } else {
6887 /*
6888 * [ VC: Entity Declared ]
6889 * In a document with an external subset or external
6890 * parameter entities with "standalone='no'", ...
6891 * ... The declaration of a parameter entity must
6892 * precede any reference to it...
6893 */
6894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6895 "PEReference: %%%s; not found\n",
6896 name, NULL);
6897 ctxt->valid = 0;
6898 }
6899 } else {
6900 /*
6901 * Internal checking in case the entity quest barfed
6902 */
6903 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6904 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6905 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6906 "Internal: %%%s; is not a parameter entity\n",
6907 name, NULL);
6908 } else if (ctxt->input->free != deallocblankswrapper) {
6909 input =
6910 xmlNewBlanksWrapperInputStream(ctxt, entity);
6911 xmlPushInput(ctxt, input);
6912 } else {
6913 /*
6914 * TODO !!!
6915 * handle the extra spaces added before and after
6916 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6917 */
6918 input = xmlNewEntityInputStream(ctxt, entity);
6919 xmlPushInput(ctxt, input);
6920 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006921 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006922 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006923 xmlParseTextDecl(ctxt);
6924 if (ctxt->errNo ==
6925 XML_ERR_UNSUPPORTED_ENCODING) {
6926 /*
6927 * The XML REC instructs us to stop parsing
6928 * right here
6929 */
6930 ctxt->instate = XML_PARSER_EOF;
6931 return;
6932 }
6933 }
6934 }
6935 }
6936 ctxt->hasPErefs = 1;
6937 } else {
6938 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6939 }
6940 }
Owen Taylor3473f882001-02-23 17:55:21 +00006941 }
6942}
6943
6944/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00006945 * xmlLoadEntityContent:
6946 * @ctxt: an XML parser context
6947 * @entity: an unloaded system entity
6948 *
6949 * Load the original content of the given system entity from the
6950 * ExternalID/SystemID given. This is to be used for Included in Literal
6951 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
6952 *
6953 * Returns 0 in case of success and -1 in case of failure
6954 */
6955static int
6956xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
6957 xmlParserInputPtr input;
6958 xmlBufferPtr buf;
6959 int l, c;
6960 int count = 0;
6961
6962 if ((ctxt == NULL) || (entity == NULL) ||
6963 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
6964 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
6965 (entity->content != NULL)) {
6966 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6967 "xmlLoadEntityContent parameter error");
6968 return(-1);
6969 }
6970
6971 if (xmlParserDebugEntities)
6972 xmlGenericError(xmlGenericErrorContext,
6973 "Reading %s entity content input\n", entity->name);
6974
6975 buf = xmlBufferCreate();
6976 if (buf == NULL) {
6977 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6978 "xmlLoadEntityContent parameter error");
6979 return(-1);
6980 }
6981
6982 input = xmlNewEntityInputStream(ctxt, entity);
6983 if (input == NULL) {
6984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6985 "xmlLoadEntityContent input error");
6986 xmlBufferFree(buf);
6987 return(-1);
6988 }
6989
6990 /*
6991 * Push the entity as the current input, read char by char
6992 * saving to the buffer until the end of the entity or an error
6993 */
6994 xmlPushInput(ctxt, input);
6995 GROW;
6996 c = CUR_CHAR(l);
6997 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
6998 (IS_CHAR(c))) {
6999 xmlBufferAdd(buf, ctxt->input->cur, l);
7000 if (count++ > 100) {
7001 count = 0;
7002 GROW;
7003 }
7004 NEXTL(l);
7005 c = CUR_CHAR(l);
7006 }
7007
7008 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7009 xmlPopInput(ctxt);
7010 } else if (!IS_CHAR(c)) {
7011 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7012 "xmlLoadEntityContent: invalid char value %d\n",
7013 c);
7014 xmlBufferFree(buf);
7015 return(-1);
7016 }
7017 entity->content = buf->content;
7018 buf->content = NULL;
7019 xmlBufferFree(buf);
7020
7021 return(0);
7022}
7023
7024/**
Owen Taylor3473f882001-02-23 17:55:21 +00007025 * xmlParseStringPEReference:
7026 * @ctxt: an XML parser context
7027 * @str: a pointer to an index in the string
7028 *
7029 * parse PEReference declarations
7030 *
7031 * [69] PEReference ::= '%' Name ';'
7032 *
7033 * [ WFC: No Recursion ]
7034 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007035 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007036 *
7037 * [ WFC: Entity Declared ]
7038 * In a document without any DTD, a document with only an internal DTD
7039 * subset which contains no parameter entity references, or a document
7040 * with "standalone='yes'", ... ... The declaration of a parameter
7041 * entity must precede any reference to it...
7042 *
7043 * [ VC: Entity Declared ]
7044 * In a document with an external subset or external parameter entities
7045 * with "standalone='no'", ... ... The declaration of a parameter entity
7046 * must precede any reference to it...
7047 *
7048 * [ WFC: In DTD ]
7049 * Parameter-entity references may only appear in the DTD.
7050 * NOTE: misleading but this is handled.
7051 *
7052 * Returns the string of the entity content.
7053 * str is updated to the current value of the index
7054 */
7055xmlEntityPtr
7056xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7057 const xmlChar *ptr;
7058 xmlChar cur;
7059 xmlChar *name;
7060 xmlEntityPtr entity = NULL;
7061
7062 if ((str == NULL) || (*str == NULL)) return(NULL);
7063 ptr = *str;
7064 cur = *ptr;
7065 if (cur == '%') {
7066 ptr++;
7067 cur = *ptr;
7068 name = xmlParseStringName(ctxt, &ptr);
7069 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007070 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7071 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007072 } else {
7073 cur = *ptr;
7074 if (cur == ';') {
7075 ptr++;
7076 cur = *ptr;
7077 if ((ctxt->sax != NULL) &&
7078 (ctxt->sax->getParameterEntity != NULL))
7079 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7080 name);
7081 if (entity == NULL) {
7082 /*
7083 * [ WFC: Entity Declared ]
7084 * In a document without any DTD, a document with only an
7085 * internal DTD subset which contains no parameter entity
7086 * references, or a document with "standalone='yes'", ...
7087 * ... The declaration of a parameter entity must precede
7088 * any reference to it...
7089 */
7090 if ((ctxt->standalone == 1) ||
7091 ((ctxt->hasExternalSubset == 0) &&
7092 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007093 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007094 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007095 } else {
7096 /*
7097 * [ VC: Entity Declared ]
7098 * In a document with an external subset or external
7099 * parameter entities with "standalone='no'", ...
7100 * ... The declaration of a parameter entity must
7101 * precede any reference to it...
7102 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007103 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7104 "PEReference: %%%s; not found\n",
7105 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007106 ctxt->valid = 0;
7107 }
7108 } else {
7109 /*
7110 * Internal checking in case the entity quest barfed
7111 */
7112 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7113 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007114 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7115 "%%%s; is not a parameter entity\n",
7116 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007117 }
7118 }
7119 ctxt->hasPErefs = 1;
7120 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007121 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007122 }
7123 xmlFree(name);
7124 }
7125 }
7126 *str = ptr;
7127 return(entity);
7128}
7129
7130/**
7131 * xmlParseDocTypeDecl:
7132 * @ctxt: an XML parser context
7133 *
7134 * parse a DOCTYPE declaration
7135 *
7136 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7137 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7138 *
7139 * [ VC: Root Element Type ]
7140 * The Name in the document type declaration must match the element
7141 * type of the root element.
7142 */
7143
7144void
7145xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007146 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007147 xmlChar *ExternalID = NULL;
7148 xmlChar *URI = NULL;
7149
7150 /*
7151 * We know that '<!DOCTYPE' has been detected.
7152 */
7153 SKIP(9);
7154
7155 SKIP_BLANKS;
7156
7157 /*
7158 * Parse the DOCTYPE name.
7159 */
7160 name = xmlParseName(ctxt);
7161 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7163 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007164 }
7165 ctxt->intSubName = name;
7166
7167 SKIP_BLANKS;
7168
7169 /*
7170 * Check for SystemID and ExternalID
7171 */
7172 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7173
7174 if ((URI != NULL) || (ExternalID != NULL)) {
7175 ctxt->hasExternalSubset = 1;
7176 }
7177 ctxt->extSubURI = URI;
7178 ctxt->extSubSystem = ExternalID;
7179
7180 SKIP_BLANKS;
7181
7182 /*
7183 * Create and update the internal subset.
7184 */
7185 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7186 (!ctxt->disableSAX))
7187 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7188
7189 /*
7190 * Is there any internal subset declarations ?
7191 * they are handled separately in xmlParseInternalSubset()
7192 */
7193 if (RAW == '[')
7194 return;
7195
7196 /*
7197 * We should be at the end of the DOCTYPE declaration.
7198 */
7199 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007200 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007201 }
7202 NEXT;
7203}
7204
7205/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007206 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007207 * @ctxt: an XML parser context
7208 *
7209 * parse the internal subset declaration
7210 *
7211 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7212 */
7213
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007214static void
Owen Taylor3473f882001-02-23 17:55:21 +00007215xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7216 /*
7217 * Is there any DTD definition ?
7218 */
7219 if (RAW == '[') {
7220 ctxt->instate = XML_PARSER_DTD;
7221 NEXT;
7222 /*
7223 * Parse the succession of Markup declarations and
7224 * PEReferences.
7225 * Subsequence (markupdecl | PEReference | S)*
7226 */
7227 while (RAW != ']') {
7228 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007229 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007230
7231 SKIP_BLANKS;
7232 xmlParseMarkupDecl(ctxt);
7233 xmlParsePEReference(ctxt);
7234
7235 /*
7236 * Pop-up of finished entities.
7237 */
7238 while ((RAW == 0) && (ctxt->inputNr > 1))
7239 xmlPopInput(ctxt);
7240
7241 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007242 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007243 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007244 break;
7245 }
7246 }
7247 if (RAW == ']') {
7248 NEXT;
7249 SKIP_BLANKS;
7250 }
7251 }
7252
7253 /*
7254 * We should be at the end of the DOCTYPE declaration.
7255 */
7256 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007257 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007258 }
7259 NEXT;
7260}
7261
Daniel Veillard81273902003-09-30 00:43:48 +00007262#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007263/**
7264 * xmlParseAttribute:
7265 * @ctxt: an XML parser context
7266 * @value: a xmlChar ** used to store the value of the attribute
7267 *
7268 * parse an attribute
7269 *
7270 * [41] Attribute ::= Name Eq AttValue
7271 *
7272 * [ WFC: No External Entity References ]
7273 * Attribute values cannot contain direct or indirect entity references
7274 * to external entities.
7275 *
7276 * [ WFC: No < in Attribute Values ]
7277 * The replacement text of any entity referred to directly or indirectly in
7278 * an attribute value (other than "&lt;") must not contain a <.
7279 *
7280 * [ VC: Attribute Value Type ]
7281 * The attribute must have been declared; the value must be of the type
7282 * declared for it.
7283 *
7284 * [25] Eq ::= S? '=' S?
7285 *
7286 * With namespace:
7287 *
7288 * [NS 11] Attribute ::= QName Eq AttValue
7289 *
7290 * Also the case QName == xmlns:??? is handled independently as a namespace
7291 * definition.
7292 *
7293 * Returns the attribute name, and the value in *value.
7294 */
7295
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007296const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007297xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007298 const xmlChar *name;
7299 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007300
7301 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007302 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007303 name = xmlParseName(ctxt);
7304 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007305 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007306 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007307 return(NULL);
7308 }
7309
7310 /*
7311 * read the value
7312 */
7313 SKIP_BLANKS;
7314 if (RAW == '=') {
7315 NEXT;
7316 SKIP_BLANKS;
7317 val = xmlParseAttValue(ctxt);
7318 ctxt->instate = XML_PARSER_CONTENT;
7319 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007320 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007321 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007322 return(NULL);
7323 }
7324
7325 /*
7326 * Check that xml:lang conforms to the specification
7327 * No more registered as an error, just generate a warning now
7328 * since this was deprecated in XML second edition
7329 */
7330 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7331 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007332 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7333 "Malformed value for xml:lang : %s\n",
7334 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007335 }
7336 }
7337
7338 /*
7339 * Check that xml:space conforms to the specification
7340 */
7341 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7342 if (xmlStrEqual(val, BAD_CAST "default"))
7343 *(ctxt->space) = 0;
7344 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7345 *(ctxt->space) = 1;
7346 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007347 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007348"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007349 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007350 }
7351 }
7352
7353 *value = val;
7354 return(name);
7355}
7356
7357/**
7358 * xmlParseStartTag:
7359 * @ctxt: an XML parser context
7360 *
7361 * parse a start of tag either for rule element or
7362 * EmptyElement. In both case we don't parse the tag closing chars.
7363 *
7364 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7365 *
7366 * [ WFC: Unique Att Spec ]
7367 * No attribute name may appear more than once in the same start-tag or
7368 * empty-element tag.
7369 *
7370 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7371 *
7372 * [ WFC: Unique Att Spec ]
7373 * No attribute name may appear more than once in the same start-tag or
7374 * empty-element tag.
7375 *
7376 * With namespace:
7377 *
7378 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7379 *
7380 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7381 *
7382 * Returns the element name parsed
7383 */
7384
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007385const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007386xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007387 const xmlChar *name;
7388 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007389 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007390 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007391 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007392 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007393 int i;
7394
7395 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007396 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007397
7398 name = xmlParseName(ctxt);
7399 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007400 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007401 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007402 return(NULL);
7403 }
7404
7405 /*
7406 * Now parse the attributes, it ends up with the ending
7407 *
7408 * (S Attribute)* S?
7409 */
7410 SKIP_BLANKS;
7411 GROW;
7412
Daniel Veillard21a0f912001-02-25 19:54:14 +00007413 while ((RAW != '>') &&
7414 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007415 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007416 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007417 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007418
7419 attname = xmlParseAttribute(ctxt, &attvalue);
7420 if ((attname != NULL) && (attvalue != NULL)) {
7421 /*
7422 * [ WFC: Unique Att Spec ]
7423 * No attribute name may appear more than once in the same
7424 * start-tag or empty-element tag.
7425 */
7426 for (i = 0; i < nbatts;i += 2) {
7427 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007428 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007429 xmlFree(attvalue);
7430 goto failed;
7431 }
7432 }
Owen Taylor3473f882001-02-23 17:55:21 +00007433 /*
7434 * Add the pair to atts
7435 */
7436 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007437 maxatts = 22; /* allow for 10 attrs by default */
7438 atts = (const xmlChar **)
7439 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007440 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007441 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007442 if (attvalue != NULL)
7443 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007444 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007445 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007446 ctxt->atts = atts;
7447 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007448 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007449 const xmlChar **n;
7450
Owen Taylor3473f882001-02-23 17:55:21 +00007451 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007452 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007453 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007454 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007455 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007456 if (attvalue != NULL)
7457 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007458 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007459 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007460 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007461 ctxt->atts = atts;
7462 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007463 }
7464 atts[nbatts++] = attname;
7465 atts[nbatts++] = attvalue;
7466 atts[nbatts] = NULL;
7467 atts[nbatts + 1] = NULL;
7468 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007469 if (attvalue != NULL)
7470 xmlFree(attvalue);
7471 }
7472
7473failed:
7474
Daniel Veillard3772de32002-12-17 10:31:45 +00007475 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007476 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7477 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007478 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007479 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7480 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007481 }
7482 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007483 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7484 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007485 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7486 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007487 break;
7488 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007489 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007490 GROW;
7491 }
7492
7493 /*
7494 * SAX: Start of Element !
7495 */
7496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007497 (!ctxt->disableSAX)) {
7498 if (nbatts > 0)
7499 ctxt->sax->startElement(ctxt->userData, name, atts);
7500 else
7501 ctxt->sax->startElement(ctxt->userData, name, NULL);
7502 }
Owen Taylor3473f882001-02-23 17:55:21 +00007503
7504 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007505 /* Free only the content strings */
7506 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007507 if (atts[i] != NULL)
7508 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007509 }
7510 return(name);
7511}
7512
7513/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007514 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007515 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 * @line: line of the start tag
7517 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007518 *
7519 * parse an end of tag
7520 *
7521 * [42] ETag ::= '</' Name S? '>'
7522 *
7523 * With namespace
7524 *
7525 * [NS 9] ETag ::= '</' QName S? '>'
7526 */
7527
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007528static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007529xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007530 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007531
7532 GROW;
7533 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007534 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007535 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007536 return;
7537 }
7538 SKIP(2);
7539
Daniel Veillard46de64e2002-05-29 08:21:33 +00007540 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007541
7542 /*
7543 * We should definitely be at the ending "S? '>'" part
7544 */
7545 GROW;
7546 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007547 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007548 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007549 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007550 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007551
7552 /*
7553 * [ WFC: Element Type Match ]
7554 * The Name in an element's end-tag must match the element type in the
7555 * start-tag.
7556 *
7557 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007558 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007559 if (name == NULL) name = BAD_CAST "unparseable";
7560 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007561 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007562 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007563 }
7564
7565 /*
7566 * SAX: End of Tag
7567 */
7568 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7569 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007570 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007571
Daniel Veillarde57ec792003-09-10 10:50:59 +00007572 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007573 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007574 return;
7575}
7576
7577/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007578 * xmlParseEndTag:
7579 * @ctxt: an XML parser context
7580 *
7581 * parse an end of tag
7582 *
7583 * [42] ETag ::= '</' Name S? '>'
7584 *
7585 * With namespace
7586 *
7587 * [NS 9] ETag ::= '</' QName S? '>'
7588 */
7589
7590void
7591xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007592 xmlParseEndTag1(ctxt, 0);
7593}
Daniel Veillard81273902003-09-30 00:43:48 +00007594#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007595
7596/************************************************************************
7597 * *
7598 * SAX 2 specific operations *
7599 * *
7600 ************************************************************************/
7601
7602static const xmlChar *
7603xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7604 int len = 0, l;
7605 int c;
7606 int count = 0;
7607
7608 /*
7609 * Handler for more complex cases
7610 */
7611 GROW;
7612 c = CUR_CHAR(l);
7613 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007614 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007615 return(NULL);
7616 }
7617
7618 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007619 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007620 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007621 (IS_COMBINING(c)) ||
7622 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007623 if (count++ > 100) {
7624 count = 0;
7625 GROW;
7626 }
7627 len += l;
7628 NEXTL(l);
7629 c = CUR_CHAR(l);
7630 }
7631 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7632}
7633
7634/*
7635 * xmlGetNamespace:
7636 * @ctxt: an XML parser context
7637 * @prefix: the prefix to lookup
7638 *
7639 * Lookup the namespace name for the @prefix (which ca be NULL)
7640 * The prefix must come from the @ctxt->dict dictionnary
7641 *
7642 * Returns the namespace name or NULL if not bound
7643 */
7644static const xmlChar *
7645xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7646 int i;
7647
Daniel Veillarde57ec792003-09-10 10:50:59 +00007648 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007649 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007650 if (ctxt->nsTab[i] == prefix) {
7651 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7652 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007654 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007655 return(NULL);
7656}
7657
7658/**
7659 * xmlParseNCName:
7660 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007661 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007662 *
7663 * parse an XML name.
7664 *
7665 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7666 * CombiningChar | Extender
7667 *
7668 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7669 *
7670 * Returns the Name parsed or NULL
7671 */
7672
7673static const xmlChar *
7674xmlParseNCName(xmlParserCtxtPtr ctxt) {
7675 const xmlChar *in;
7676 const xmlChar *ret;
7677 int count = 0;
7678
7679 /*
7680 * Accelerator for simple ASCII names
7681 */
7682 in = ctxt->input->cur;
7683 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7684 ((*in >= 0x41) && (*in <= 0x5A)) ||
7685 (*in == '_')) {
7686 in++;
7687 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7688 ((*in >= 0x41) && (*in <= 0x5A)) ||
7689 ((*in >= 0x30) && (*in <= 0x39)) ||
7690 (*in == '_') || (*in == '-') ||
7691 (*in == '.'))
7692 in++;
7693 if ((*in > 0) && (*in < 0x80)) {
7694 count = in - ctxt->input->cur;
7695 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7696 ctxt->input->cur = in;
7697 ctxt->nbChars += count;
7698 ctxt->input->col += count;
7699 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007700 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 }
7702 return(ret);
7703 }
7704 }
7705 return(xmlParseNCNameComplex(ctxt));
7706}
7707
7708/**
7709 * xmlParseQName:
7710 * @ctxt: an XML parser context
7711 * @prefix: pointer to store the prefix part
7712 *
7713 * parse an XML Namespace QName
7714 *
7715 * [6] QName ::= (Prefix ':')? LocalPart
7716 * [7] Prefix ::= NCName
7717 * [8] LocalPart ::= NCName
7718 *
7719 * Returns the Name parsed or NULL
7720 */
7721
7722static const xmlChar *
7723xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7724 const xmlChar *l, *p;
7725
7726 GROW;
7727
7728 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007729 if (l == NULL) {
7730 if (CUR == ':') {
7731 l = xmlParseName(ctxt);
7732 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7734 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007735 *prefix = NULL;
7736 return(l);
7737 }
7738 }
7739 return(NULL);
7740 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741 if (CUR == ':') {
7742 NEXT;
7743 p = l;
7744 l = xmlParseNCName(ctxt);
7745 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007746 xmlChar *tmp;
7747
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007748 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7749 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007750 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7751 p = xmlDictLookup(ctxt->dict, tmp, -1);
7752 if (tmp != NULL) xmlFree(tmp);
7753 *prefix = NULL;
7754 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007755 }
7756 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007757 xmlChar *tmp;
7758
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007759 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7760 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007761 NEXT;
7762 tmp = (xmlChar *) xmlParseName(ctxt);
7763 if (tmp != NULL) {
7764 tmp = xmlBuildQName(tmp, l, NULL, 0);
7765 l = xmlDictLookup(ctxt->dict, tmp, -1);
7766 if (tmp != NULL) xmlFree(tmp);
7767 *prefix = p;
7768 return(l);
7769 }
7770 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7771 l = xmlDictLookup(ctxt->dict, tmp, -1);
7772 if (tmp != NULL) xmlFree(tmp);
7773 *prefix = p;
7774 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007775 }
7776 *prefix = p;
7777 } else
7778 *prefix = NULL;
7779 return(l);
7780}
7781
7782/**
7783 * xmlParseQNameAndCompare:
7784 * @ctxt: an XML parser context
7785 * @name: the localname
7786 * @prefix: the prefix, if any.
7787 *
7788 * parse an XML name and compares for match
7789 * (specialized for endtag parsing)
7790 *
7791 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7792 * and the name for mismatch
7793 */
7794
7795static const xmlChar *
7796xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7797 xmlChar const *prefix) {
7798 const xmlChar *cmp = name;
7799 const xmlChar *in;
7800 const xmlChar *ret;
7801 const xmlChar *prefix2;
7802
7803 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7804
7805 GROW;
7806 in = ctxt->input->cur;
7807
7808 cmp = prefix;
7809 while (*in != 0 && *in == *cmp) {
7810 ++in;
7811 ++cmp;
7812 }
7813 if ((*cmp == 0) && (*in == ':')) {
7814 in++;
7815 cmp = name;
7816 while (*in != 0 && *in == *cmp) {
7817 ++in;
7818 ++cmp;
7819 }
William M. Brack76e95df2003-10-18 16:20:14 +00007820 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821 /* success */
7822 ctxt->input->cur = in;
7823 return((const xmlChar*) 1);
7824 }
7825 }
7826 /*
7827 * all strings coms from the dictionary, equality can be done directly
7828 */
7829 ret = xmlParseQName (ctxt, &prefix2);
7830 if ((ret == name) && (prefix == prefix2))
7831 return((const xmlChar*) 1);
7832 return ret;
7833}
7834
7835/**
7836 * xmlParseAttValueInternal:
7837 * @ctxt: an XML parser context
7838 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007839 * @alloc: whether the attribute was reallocated as a new string
7840 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007841 *
7842 * parse a value for an attribute.
7843 * NOTE: if no normalization is needed, the routine will return pointers
7844 * directly from the data buffer.
7845 *
7846 * 3.3.3 Attribute-Value Normalization:
7847 * Before the value of an attribute is passed to the application or
7848 * checked for validity, the XML processor must normalize it as follows:
7849 * - a character reference is processed by appending the referenced
7850 * character to the attribute value
7851 * - an entity reference is processed by recursively processing the
7852 * replacement text of the entity
7853 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7854 * appending #x20 to the normalized value, except that only a single
7855 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7856 * parsed entity or the literal entity value of an internal parsed entity
7857 * - other characters are processed by appending them to the normalized value
7858 * If the declared value is not CDATA, then the XML processor must further
7859 * process the normalized attribute value by discarding any leading and
7860 * trailing space (#x20) characters, and by replacing sequences of space
7861 * (#x20) characters by a single space (#x20) character.
7862 * All attributes for which no declaration has been read should be treated
7863 * by a non-validating parser as if declared CDATA.
7864 *
7865 * Returns the AttValue parsed or NULL. The value has to be freed by the
7866 * caller if it was copied, this can be detected by val[*len] == 0.
7867 */
7868
7869static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007870xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7871 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007872{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007873 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007874 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007875 xmlChar *ret = NULL;
7876
7877 GROW;
7878 in = (xmlChar *) CUR_PTR;
7879 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007880 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007881 return (NULL);
7882 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007883 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007884
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007885 /*
7886 * try to handle in this routine the most common case where no
7887 * allocation of a new string is required and where content is
7888 * pure ASCII.
7889 */
7890 limit = *in++;
7891 end = ctxt->input->end;
7892 start = in;
7893 if (in >= end) {
7894 const xmlChar *oldbase = ctxt->input->base;
7895 GROW;
7896 if (oldbase != ctxt->input->base) {
7897 long delta = ctxt->input->base - oldbase;
7898 start = start + delta;
7899 in = in + delta;
7900 }
7901 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007903 if (normalize) {
7904 /*
7905 * Skip any leading spaces
7906 */
7907 while ((in < end) && (*in != limit) &&
7908 ((*in == 0x20) || (*in == 0x9) ||
7909 (*in == 0xA) || (*in == 0xD))) {
7910 in++;
7911 start = in;
7912 if (in >= end) {
7913 const xmlChar *oldbase = ctxt->input->base;
7914 GROW;
7915 if (oldbase != ctxt->input->base) {
7916 long delta = ctxt->input->base - oldbase;
7917 start = start + delta;
7918 in = in + delta;
7919 }
7920 end = ctxt->input->end;
7921 }
7922 }
7923 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7924 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7925 if ((*in++ == 0x20) && (*in == 0x20)) break;
7926 if (in >= end) {
7927 const xmlChar *oldbase = ctxt->input->base;
7928 GROW;
7929 if (oldbase != ctxt->input->base) {
7930 long delta = ctxt->input->base - oldbase;
7931 start = start + delta;
7932 in = in + delta;
7933 }
7934 end = ctxt->input->end;
7935 }
7936 }
7937 last = in;
7938 /*
7939 * skip the trailing blanks
7940 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007941 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007942 while ((in < end) && (*in != limit) &&
7943 ((*in == 0x20) || (*in == 0x9) ||
7944 (*in == 0xA) || (*in == 0xD))) {
7945 in++;
7946 if (in >= end) {
7947 const xmlChar *oldbase = ctxt->input->base;
7948 GROW;
7949 if (oldbase != ctxt->input->base) {
7950 long delta = ctxt->input->base - oldbase;
7951 start = start + delta;
7952 in = in + delta;
7953 last = last + delta;
7954 }
7955 end = ctxt->input->end;
7956 }
7957 }
7958 if (*in != limit) goto need_complex;
7959 } else {
7960 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7961 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7962 in++;
7963 if (in >= end) {
7964 const xmlChar *oldbase = ctxt->input->base;
7965 GROW;
7966 if (oldbase != ctxt->input->base) {
7967 long delta = ctxt->input->base - oldbase;
7968 start = start + delta;
7969 in = in + delta;
7970 }
7971 end = ctxt->input->end;
7972 }
7973 }
7974 last = in;
7975 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007976 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007977 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007978 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007979 *len = last - start;
7980 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007981 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007982 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007983 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 }
7985 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007986 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007988need_complex:
7989 if (alloc) *alloc = 1;
7990 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007991}
7992
7993/**
7994 * xmlParseAttribute2:
7995 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007996 * @pref: the element prefix
7997 * @elem: the element name
7998 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007999 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008000 * @len: an int * to save the length of the attribute
8001 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 *
8003 * parse an attribute in the new SAX2 framework.
8004 *
8005 * Returns the attribute name, and the value in *value, .
8006 */
8007
8008static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008009xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008010 const xmlChar * pref, const xmlChar * elem,
8011 const xmlChar ** prefix, xmlChar ** value,
8012 int *len, int *alloc)
8013{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008014 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008015 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008016 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017
8018 *value = NULL;
8019 GROW;
8020 name = xmlParseQName(ctxt, prefix);
8021 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008022 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8023 "error parsing attribute name\n");
8024 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008025 }
8026
8027 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008028 * get the type if needed
8029 */
8030 if (ctxt->attsSpecial != NULL) {
8031 int type;
8032
8033 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008034 pref, elem, *prefix, name);
8035 if (type != 0)
8036 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008037 }
8038
8039 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008040 * read the value
8041 */
8042 SKIP_BLANKS;
8043 if (RAW == '=') {
8044 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008045 SKIP_BLANKS;
8046 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8047 if (normalize) {
8048 /*
8049 * Sometimes a second normalisation pass for spaces is needed
8050 * but that only happens if charrefs or entities refernces
8051 * have been used in the attribute value, i.e. the attribute
8052 * value have been extracted in an allocated string already.
8053 */
8054 if (*alloc) {
8055 const xmlChar *val2;
8056
8057 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8058 if (val2 != NULL) {
8059 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008060 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008061 }
8062 }
8063 }
8064 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008065 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008066 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8067 "Specification mandate value for attribute %s\n",
8068 name);
8069 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008070 }
8071
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008072 if (*prefix == ctxt->str_xml) {
8073 /*
8074 * Check that xml:lang conforms to the specification
8075 * No more registered as an error, just generate a warning now
8076 * since this was deprecated in XML second edition
8077 */
8078 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8079 internal_val = xmlStrndup(val, *len);
8080 if (!xmlCheckLanguageID(internal_val)) {
8081 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8082 "Malformed value for xml:lang : %s\n",
8083 internal_val, NULL);
8084 }
8085 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008086
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008087 /*
8088 * Check that xml:space conforms to the specification
8089 */
8090 if (xmlStrEqual(name, BAD_CAST "space")) {
8091 internal_val = xmlStrndup(val, *len);
8092 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8093 *(ctxt->space) = 0;
8094 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8095 *(ctxt->space) = 1;
8096 else {
8097 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8098 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8099 internal_val, NULL);
8100 }
8101 }
8102 if (internal_val) {
8103 xmlFree(internal_val);
8104 }
8105 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008106
8107 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008108 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008109}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008110/**
8111 * xmlParseStartTag2:
8112 * @ctxt: an XML parser context
8113 *
8114 * parse a start of tag either for rule element or
8115 * EmptyElement. In both case we don't parse the tag closing chars.
8116 * This routine is called when running SAX2 parsing
8117 *
8118 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8119 *
8120 * [ WFC: Unique Att Spec ]
8121 * No attribute name may appear more than once in the same start-tag or
8122 * empty-element tag.
8123 *
8124 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8125 *
8126 * [ WFC: Unique Att Spec ]
8127 * No attribute name may appear more than once in the same start-tag or
8128 * empty-element tag.
8129 *
8130 * With namespace:
8131 *
8132 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8133 *
8134 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8135 *
8136 * Returns the element name parsed
8137 */
8138
8139static const xmlChar *
8140xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008141 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008142 const xmlChar *localname;
8143 const xmlChar *prefix;
8144 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008145 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008146 const xmlChar *nsname;
8147 xmlChar *attvalue;
8148 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008149 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008150 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008151 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008152 const xmlChar *base;
8153 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008154 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008155
8156 if (RAW != '<') return(NULL);
8157 NEXT1;
8158
8159 /*
8160 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8161 * point since the attribute values may be stored as pointers to
8162 * the buffer and calling SHRINK would destroy them !
8163 * The Shrinking is only possible once the full set of attribute
8164 * callbacks have been done.
8165 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008166reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008167 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008168 base = ctxt->input->base;
8169 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008170 oldline = ctxt->input->line;
8171 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008172 nbatts = 0;
8173 nratts = 0;
8174 nbdef = 0;
8175 nbNs = 0;
8176 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008177 /* Forget any namespaces added during an earlier parse of this element. */
8178 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179
8180 localname = xmlParseQName(ctxt, &prefix);
8181 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008182 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8183 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 return(NULL);
8185 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008186 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187
8188 /*
8189 * Now parse the attributes, it ends up with the ending
8190 *
8191 * (S Attribute)* S?
8192 */
8193 SKIP_BLANKS;
8194 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008195 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196
8197 while ((RAW != '>') &&
8198 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008199 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008200 const xmlChar *q = CUR_PTR;
8201 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008202 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008204 attname = xmlParseAttribute2(ctxt, prefix, localname,
8205 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008206 if (ctxt->input->base != base) {
8207 if ((attvalue != NULL) && (alloc != 0))
8208 xmlFree(attvalue);
8209 attvalue = NULL;
8210 goto base_changed;
8211 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008212 if ((attname != NULL) && (attvalue != NULL)) {
8213 if (len < 0) len = xmlStrlen(attvalue);
8214 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008215 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8216 xmlURIPtr uri;
8217
8218 if (*URL != 0) {
8219 uri = xmlParseURI((const char *) URL);
8220 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008221 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8222 "xmlns: %s not a valid URI\n",
8223 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008224 } else {
8225 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008226 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8227 "xmlns: URI %s is not absolute\n",
8228 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008229 }
8230 xmlFreeURI(uri);
8231 }
8232 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008234 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008235 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008236 for (j = 1;j <= nbNs;j++)
8237 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8238 break;
8239 if (j <= nbNs)
8240 xmlErrAttributeDup(ctxt, NULL, attname);
8241 else
8242 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008243 if (alloc != 0) xmlFree(attvalue);
8244 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008245 continue;
8246 }
8247 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008248 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8249 xmlURIPtr uri;
8250
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008251 if (attname == ctxt->str_xml) {
8252 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008253 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8254 "xml namespace prefix mapped to wrong URI\n",
8255 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008256 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008257 /*
8258 * Do not keep a namespace definition node
8259 */
8260 if (alloc != 0) xmlFree(attvalue);
8261 SKIP_BLANKS;
8262 continue;
8263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008264 uri = xmlParseURI((const char *) URL);
8265 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008266 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8267 "xmlns:%s: '%s' is not a valid URI\n",
8268 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008269 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008270 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008271 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8272 "xmlns:%s: URI %s is not absolute\n",
8273 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008274 }
8275 xmlFreeURI(uri);
8276 }
8277
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008279 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008280 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008281 for (j = 1;j <= nbNs;j++)
8282 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8283 break;
8284 if (j <= nbNs)
8285 xmlErrAttributeDup(ctxt, aprefix, attname);
8286 else
8287 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008288 if (alloc != 0) xmlFree(attvalue);
8289 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008290 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008291 continue;
8292 }
8293
8294 /*
8295 * Add the pair to atts
8296 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008297 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8298 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008299 if (attvalue[len] == 0)
8300 xmlFree(attvalue);
8301 goto failed;
8302 }
8303 maxatts = ctxt->maxatts;
8304 atts = ctxt->atts;
8305 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008306 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008307 atts[nbatts++] = attname;
8308 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008309 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008310 atts[nbatts++] = attvalue;
8311 attvalue += len;
8312 atts[nbatts++] = attvalue;
8313 /*
8314 * tag if some deallocation is needed
8315 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008316 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008317 } else {
8318 if ((attvalue != NULL) && (attvalue[len] == 0))
8319 xmlFree(attvalue);
8320 }
8321
8322failed:
8323
8324 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008325 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008326 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8327 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008328 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8330 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008331 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008332 }
8333 SKIP_BLANKS;
8334 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8335 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008336 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338 break;
8339 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008340 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008341 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008342 }
8343
Daniel Veillard0fb18932003-09-07 09:14:37 +00008344 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008345 * The attributes defaulting
8346 */
8347 if (ctxt->attsDefault != NULL) {
8348 xmlDefAttrsPtr defaults;
8349
8350 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8351 if (defaults != NULL) {
8352 for (i = 0;i < defaults->nbAttrs;i++) {
8353 attname = defaults->values[4 * i];
8354 aprefix = defaults->values[4 * i + 1];
8355
8356 /*
8357 * special work for namespaces defaulted defs
8358 */
8359 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8360 /*
8361 * check that it's not a defined namespace
8362 */
8363 for (j = 1;j <= nbNs;j++)
8364 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8365 break;
8366 if (j <= nbNs) continue;
8367
8368 nsname = xmlGetNamespace(ctxt, NULL);
8369 if (nsname != defaults->values[4 * i + 2]) {
8370 if (nsPush(ctxt, NULL,
8371 defaults->values[4 * i + 2]) > 0)
8372 nbNs++;
8373 }
8374 } else if (aprefix == ctxt->str_xmlns) {
8375 /*
8376 * check that it's not a defined namespace
8377 */
8378 for (j = 1;j <= nbNs;j++)
8379 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8380 break;
8381 if (j <= nbNs) continue;
8382
8383 nsname = xmlGetNamespace(ctxt, attname);
8384 if (nsname != defaults->values[2]) {
8385 if (nsPush(ctxt, attname,
8386 defaults->values[4 * i + 2]) > 0)
8387 nbNs++;
8388 }
8389 } else {
8390 /*
8391 * check that it's not a defined attribute
8392 */
8393 for (j = 0;j < nbatts;j+=5) {
8394 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8395 break;
8396 }
8397 if (j < nbatts) continue;
8398
8399 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8400 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008401 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008402 }
8403 maxatts = ctxt->maxatts;
8404 atts = ctxt->atts;
8405 }
8406 atts[nbatts++] = attname;
8407 atts[nbatts++] = aprefix;
8408 if (aprefix == NULL)
8409 atts[nbatts++] = NULL;
8410 else
8411 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8412 atts[nbatts++] = defaults->values[4 * i + 2];
8413 atts[nbatts++] = defaults->values[4 * i + 3];
8414 nbdef++;
8415 }
8416 }
8417 }
8418 }
8419
Daniel Veillarde70c8772003-11-25 07:21:18 +00008420 /*
8421 * The attributes checkings
8422 */
8423 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008424 /*
8425 * The default namespace does not apply to attribute names.
8426 */
8427 if (atts[i + 1] != NULL) {
8428 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8429 if (nsname == NULL) {
8430 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8431 "Namespace prefix %s for %s on %s is not defined\n",
8432 atts[i + 1], atts[i], localname);
8433 }
8434 atts[i + 2] = nsname;
8435 } else
8436 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008437 /*
8438 * [ WFC: Unique Att Spec ]
8439 * No attribute name may appear more than once in the same
8440 * start-tag or empty-element tag.
8441 * As extended by the Namespace in XML REC.
8442 */
8443 for (j = 0; j < i;j += 5) {
8444 if (atts[i] == atts[j]) {
8445 if (atts[i+1] == atts[j+1]) {
8446 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8447 break;
8448 }
8449 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8450 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8451 "Namespaced Attribute %s in '%s' redefined\n",
8452 atts[i], nsname, NULL);
8453 break;
8454 }
8455 }
8456 }
8457 }
8458
Daniel Veillarde57ec792003-09-10 10:50:59 +00008459 nsname = xmlGetNamespace(ctxt, prefix);
8460 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008461 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8462 "Namespace prefix %s on %s is not defined\n",
8463 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008464 }
8465 *pref = prefix;
8466 *URI = nsname;
8467
8468 /*
8469 * SAX: Start of Element !
8470 */
8471 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8472 (!ctxt->disableSAX)) {
8473 if (nbNs > 0)
8474 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8475 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8476 nbatts / 5, nbdef, atts);
8477 else
8478 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8479 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8480 }
8481
8482 /*
8483 * Free up attribute allocated strings if needed
8484 */
8485 if (attval != 0) {
8486 for (i = 3,j = 0; j < nratts;i += 5,j++)
8487 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8488 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008489 }
8490
8491 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008492
8493base_changed:
8494 /*
8495 * the attribute strings are valid iif the base didn't changed
8496 */
8497 if (attval != 0) {
8498 for (i = 3,j = 0; j < nratts;i += 5,j++)
8499 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8500 xmlFree((xmlChar *) atts[i]);
8501 }
8502 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008503 ctxt->input->line = oldline;
8504 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008505 if (ctxt->wellFormed == 1) {
8506 goto reparse;
8507 }
8508 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008509}
8510
8511/**
8512 * xmlParseEndTag2:
8513 * @ctxt: an XML parser context
8514 * @line: line of the start tag
8515 * @nsNr: number of namespaces on the start tag
8516 *
8517 * parse an end of tag
8518 *
8519 * [42] ETag ::= '</' Name S? '>'
8520 *
8521 * With namespace
8522 *
8523 * [NS 9] ETag ::= '</' QName S? '>'
8524 */
8525
8526static void
8527xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008528 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 const xmlChar *name;
8530
8531 GROW;
8532 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008533 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008534 return;
8535 }
8536 SKIP(2);
8537
William M. Brack13dfa872004-09-18 04:52:08 +00008538 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008539 if (ctxt->input->cur[tlen] == '>') {
8540 ctxt->input->cur += tlen + 1;
8541 goto done;
8542 }
8543 ctxt->input->cur += tlen;
8544 name = (xmlChar*)1;
8545 } else {
8546 if (prefix == NULL)
8547 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8548 else
8549 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8550 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008551
8552 /*
8553 * We should definitely be at the ending "S? '>'" part
8554 */
8555 GROW;
8556 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008557 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008558 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008559 } else
8560 NEXT1;
8561
8562 /*
8563 * [ WFC: Element Type Match ]
8564 * The Name in an element's end-tag must match the element type in the
8565 * start-tag.
8566 *
8567 */
8568 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008569 if (name == NULL) name = BAD_CAST "unparseable";
8570 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008572 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 }
8574
8575 /*
8576 * SAX: End of Tag
8577 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008578done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8580 (!ctxt->disableSAX))
8581 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8582
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583 spacePop(ctxt);
8584 if (nsNr != 0)
8585 nsPop(ctxt, nsNr);
8586 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008587}
8588
8589/**
Owen Taylor3473f882001-02-23 17:55:21 +00008590 * xmlParseCDSect:
8591 * @ctxt: an XML parser context
8592 *
8593 * Parse escaped pure raw content.
8594 *
8595 * [18] CDSect ::= CDStart CData CDEnd
8596 *
8597 * [19] CDStart ::= '<![CDATA['
8598 *
8599 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8600 *
8601 * [21] CDEnd ::= ']]>'
8602 */
8603void
8604xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8605 xmlChar *buf = NULL;
8606 int len = 0;
8607 int size = XML_PARSER_BUFFER_SIZE;
8608 int r, rl;
8609 int s, sl;
8610 int cur, l;
8611 int count = 0;
8612
Daniel Veillard8f597c32003-10-06 08:19:27 +00008613 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008614 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008615 SKIP(9);
8616 } else
8617 return;
8618
8619 ctxt->instate = XML_PARSER_CDATA_SECTION;
8620 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008621 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008622 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 ctxt->instate = XML_PARSER_CONTENT;
8624 return;
8625 }
8626 NEXTL(rl);
8627 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008628 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008629 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008630 ctxt->instate = XML_PARSER_CONTENT;
8631 return;
8632 }
8633 NEXTL(sl);
8634 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008635 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008636 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008637 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008638 return;
8639 }
William M. Brack871611b2003-10-18 04:53:14 +00008640 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008641 ((r != ']') || (s != ']') || (cur != '>'))) {
8642 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008643 xmlChar *tmp;
8644
Owen Taylor3473f882001-02-23 17:55:21 +00008645 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008646 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8647 if (tmp == NULL) {
8648 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008649 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008650 return;
8651 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008652 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008653 }
8654 COPY_BUF(rl,buf,len,r);
8655 r = s;
8656 rl = sl;
8657 s = cur;
8658 sl = l;
8659 count++;
8660 if (count > 50) {
8661 GROW;
8662 count = 0;
8663 }
8664 NEXTL(l);
8665 cur = CUR_CHAR(l);
8666 }
8667 buf[len] = 0;
8668 ctxt->instate = XML_PARSER_CONTENT;
8669 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008670 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008671 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008672 xmlFree(buf);
8673 return;
8674 }
8675 NEXTL(l);
8676
8677 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008678 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008679 */
8680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8681 if (ctxt->sax->cdataBlock != NULL)
8682 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008683 else if (ctxt->sax->characters != NULL)
8684 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008685 }
8686 xmlFree(buf);
8687}
8688
8689/**
8690 * xmlParseContent:
8691 * @ctxt: an XML parser context
8692 *
8693 * Parse a content:
8694 *
8695 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8696 */
8697
8698void
8699xmlParseContent(xmlParserCtxtPtr ctxt) {
8700 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008701 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008702 ((RAW != '<') || (NXT(1) != '/')) &&
8703 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008704 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008705 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008706 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008707
8708 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008709 * First case : a Processing Instruction.
8710 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008711 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008712 xmlParsePI(ctxt);
8713 }
8714
8715 /*
8716 * Second case : a CDSection
8717 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008718 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008719 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008720 xmlParseCDSect(ctxt);
8721 }
8722
8723 /*
8724 * Third case : a comment
8725 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008726 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008727 (NXT(2) == '-') && (NXT(3) == '-')) {
8728 xmlParseComment(ctxt);
8729 ctxt->instate = XML_PARSER_CONTENT;
8730 }
8731
8732 /*
8733 * Fourth case : a sub-element.
8734 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008735 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008736 xmlParseElement(ctxt);
8737 }
8738
8739 /*
8740 * Fifth case : a reference. If if has not been resolved,
8741 * parsing returns it's Name, create the node
8742 */
8743
Daniel Veillard21a0f912001-02-25 19:54:14 +00008744 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008745 xmlParseReference(ctxt);
8746 }
8747
8748 /*
8749 * Last case, text. Note that References are handled directly.
8750 */
8751 else {
8752 xmlParseCharData(ctxt, 0);
8753 }
8754
8755 GROW;
8756 /*
8757 * Pop-up of finished entities.
8758 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008759 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008760 xmlPopInput(ctxt);
8761 SHRINK;
8762
Daniel Veillardfdc91562002-07-01 21:52:03 +00008763 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008764 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8765 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008766 ctxt->instate = XML_PARSER_EOF;
8767 break;
8768 }
8769 }
8770}
8771
8772/**
8773 * xmlParseElement:
8774 * @ctxt: an XML parser context
8775 *
8776 * parse an XML element, this is highly recursive
8777 *
8778 * [39] element ::= EmptyElemTag | STag content ETag
8779 *
8780 * [ WFC: Element Type Match ]
8781 * The Name in an element's end-tag must match the element type in the
8782 * start-tag.
8783 *
Owen Taylor3473f882001-02-23 17:55:21 +00008784 */
8785
8786void
8787xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008788 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008789 const xmlChar *prefix;
8790 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008791 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008792 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008793 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008794 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008795
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008796 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8797 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8798 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8799 xmlParserMaxDepth);
8800 ctxt->instate = XML_PARSER_EOF;
8801 return;
8802 }
8803
Owen Taylor3473f882001-02-23 17:55:21 +00008804 /* Capture start position */
8805 if (ctxt->record_info) {
8806 node_info.begin_pos = ctxt->input->consumed +
8807 (CUR_PTR - ctxt->input->base);
8808 node_info.begin_line = ctxt->input->line;
8809 }
8810
8811 if (ctxt->spaceNr == 0)
8812 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008813 else if (*ctxt->space == -2)
8814 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008815 else
8816 spacePush(ctxt, *ctxt->space);
8817
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008818 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008819#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008820 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008821#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008822 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008823#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 else
8825 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008826#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008827 if (name == NULL) {
8828 spacePop(ctxt);
8829 return;
8830 }
8831 namePush(ctxt, name);
8832 ret = ctxt->node;
8833
Daniel Veillard4432df22003-09-28 18:58:27 +00008834#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008835 /*
8836 * [ VC: Root Element Type ]
8837 * The Name in the document type declaration must match the element
8838 * type of the root element.
8839 */
8840 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8841 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8842 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008843#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008844
8845 /*
8846 * Check for an Empty Element.
8847 */
8848 if ((RAW == '/') && (NXT(1) == '>')) {
8849 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008850 if (ctxt->sax2) {
8851 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8852 (!ctxt->disableSAX))
8853 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008854#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 } else {
8856 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8857 (!ctxt->disableSAX))
8858 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008859#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008860 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861 namePop(ctxt);
8862 spacePop(ctxt);
8863 if (nsNr != ctxt->nsNr)
8864 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008865 if ( ret != NULL && ctxt->record_info ) {
8866 node_info.end_pos = ctxt->input->consumed +
8867 (CUR_PTR - ctxt->input->base);
8868 node_info.end_line = ctxt->input->line;
8869 node_info.node = ret;
8870 xmlParserAddNodeInfo(ctxt, &node_info);
8871 }
8872 return;
8873 }
8874 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008875 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008876 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008877 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8878 "Couldn't find end of Start Tag %s line %d\n",
8879 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008880
8881 /*
8882 * end of parsing of this node.
8883 */
8884 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008886 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 if (nsNr != ctxt->nsNr)
8888 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008889
8890 /*
8891 * Capture end position and add node
8892 */
8893 if ( ret != NULL && ctxt->record_info ) {
8894 node_info.end_pos = ctxt->input->consumed +
8895 (CUR_PTR - ctxt->input->base);
8896 node_info.end_line = ctxt->input->line;
8897 node_info.node = ret;
8898 xmlParserAddNodeInfo(ctxt, &node_info);
8899 }
8900 return;
8901 }
8902
8903 /*
8904 * Parse the content of the element:
8905 */
8906 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008907 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008908 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008909 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008910 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008911
8912 /*
8913 * end of parsing of this node.
8914 */
8915 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008917 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 if (nsNr != ctxt->nsNr)
8919 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008920 return;
8921 }
8922
8923 /*
8924 * parse the end of tag: '</' should be here.
8925 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008926 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008927 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008928 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008929 }
8930#ifdef LIBXML_SAX1_ENABLED
8931 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008933#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008934
8935 /*
8936 * Capture end position and add node
8937 */
8938 if ( ret != NULL && ctxt->record_info ) {
8939 node_info.end_pos = ctxt->input->consumed +
8940 (CUR_PTR - ctxt->input->base);
8941 node_info.end_line = ctxt->input->line;
8942 node_info.node = ret;
8943 xmlParserAddNodeInfo(ctxt, &node_info);
8944 }
8945}
8946
8947/**
8948 * xmlParseVersionNum:
8949 * @ctxt: an XML parser context
8950 *
8951 * parse the XML version value.
8952 *
8953 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8954 *
8955 * Returns the string giving the XML version number, or NULL
8956 */
8957xmlChar *
8958xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8959 xmlChar *buf = NULL;
8960 int len = 0;
8961 int size = 10;
8962 xmlChar cur;
8963
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008965 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 return(NULL);
8968 }
8969 cur = CUR;
8970 while (((cur >= 'a') && (cur <= 'z')) ||
8971 ((cur >= 'A') && (cur <= 'Z')) ||
8972 ((cur >= '0') && (cur <= '9')) ||
8973 (cur == '_') || (cur == '.') ||
8974 (cur == ':') || (cur == '-')) {
8975 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008976 xmlChar *tmp;
8977
Owen Taylor3473f882001-02-23 17:55:21 +00008978 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008979 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8980 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008981 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008982 return(NULL);
8983 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008984 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008985 }
8986 buf[len++] = cur;
8987 NEXT;
8988 cur=CUR;
8989 }
8990 buf[len] = 0;
8991 return(buf);
8992}
8993
8994/**
8995 * xmlParseVersionInfo:
8996 * @ctxt: an XML parser context
8997 *
8998 * parse the XML version.
8999 *
9000 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9001 *
9002 * [25] Eq ::= S? '=' S?
9003 *
9004 * Returns the version string, e.g. "1.0"
9005 */
9006
9007xmlChar *
9008xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9009 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009010
Daniel Veillarda07050d2003-10-19 14:46:32 +00009011 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009012 SKIP(7);
9013 SKIP_BLANKS;
9014 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009015 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009016 return(NULL);
9017 }
9018 NEXT;
9019 SKIP_BLANKS;
9020 if (RAW == '"') {
9021 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009022 version = xmlParseVersionNum(ctxt);
9023 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009024 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009025 } else
9026 NEXT;
9027 } else if (RAW == '\''){
9028 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009029 version = xmlParseVersionNum(ctxt);
9030 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009031 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009032 } else
9033 NEXT;
9034 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009035 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009036 }
9037 }
9038 return(version);
9039}
9040
9041/**
9042 * xmlParseEncName:
9043 * @ctxt: an XML parser context
9044 *
9045 * parse the XML encoding name
9046 *
9047 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9048 *
9049 * Returns the encoding name value or NULL
9050 */
9051xmlChar *
9052xmlParseEncName(xmlParserCtxtPtr ctxt) {
9053 xmlChar *buf = NULL;
9054 int len = 0;
9055 int size = 10;
9056 xmlChar cur;
9057
9058 cur = CUR;
9059 if (((cur >= 'a') && (cur <= 'z')) ||
9060 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009061 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009062 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009063 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009064 return(NULL);
9065 }
9066
9067 buf[len++] = cur;
9068 NEXT;
9069 cur = CUR;
9070 while (((cur >= 'a') && (cur <= 'z')) ||
9071 ((cur >= 'A') && (cur <= 'Z')) ||
9072 ((cur >= '0') && (cur <= '9')) ||
9073 (cur == '.') || (cur == '_') ||
9074 (cur == '-')) {
9075 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009076 xmlChar *tmp;
9077
Owen Taylor3473f882001-02-23 17:55:21 +00009078 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009079 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9080 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009081 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009082 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009083 return(NULL);
9084 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009085 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009086 }
9087 buf[len++] = cur;
9088 NEXT;
9089 cur = CUR;
9090 if (cur == 0) {
9091 SHRINK;
9092 GROW;
9093 cur = CUR;
9094 }
9095 }
9096 buf[len] = 0;
9097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009098 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009099 }
9100 return(buf);
9101}
9102
9103/**
9104 * xmlParseEncodingDecl:
9105 * @ctxt: an XML parser context
9106 *
9107 * parse the XML encoding declaration
9108 *
9109 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9110 *
9111 * this setups the conversion filters.
9112 *
9113 * Returns the encoding value or NULL
9114 */
9115
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009116const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009117xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9118 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009119
9120 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009121 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009122 SKIP(8);
9123 SKIP_BLANKS;
9124 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009125 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009126 return(NULL);
9127 }
9128 NEXT;
9129 SKIP_BLANKS;
9130 if (RAW == '"') {
9131 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009132 encoding = xmlParseEncName(ctxt);
9133 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009134 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009135 } else
9136 NEXT;
9137 } else if (RAW == '\''){
9138 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009139 encoding = xmlParseEncName(ctxt);
9140 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009141 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009142 } else
9143 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009144 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009145 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009146 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009147 /*
9148 * UTF-16 encoding stwich has already taken place at this stage,
9149 * more over the little-endian/big-endian selection is already done
9150 */
9151 if ((encoding != NULL) &&
9152 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9153 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009154 if (ctxt->encoding != NULL)
9155 xmlFree((xmlChar *) ctxt->encoding);
9156 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009157 }
9158 /*
9159 * UTF-8 encoding is handled natively
9160 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009161 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009162 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9163 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009164 if (ctxt->encoding != NULL)
9165 xmlFree((xmlChar *) ctxt->encoding);
9166 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009167 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009168 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009169 xmlCharEncodingHandlerPtr handler;
9170
9171 if (ctxt->input->encoding != NULL)
9172 xmlFree((xmlChar *) ctxt->input->encoding);
9173 ctxt->input->encoding = encoding;
9174
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009175 handler = xmlFindCharEncodingHandler((const char *) encoding);
9176 if (handler != NULL) {
9177 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009178 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009179 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009180 "Unsupported encoding %s\n", encoding);
9181 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009182 }
9183 }
9184 }
9185 return(encoding);
9186}
9187
9188/**
9189 * xmlParseSDDecl:
9190 * @ctxt: an XML parser context
9191 *
9192 * parse the XML standalone declaration
9193 *
9194 * [32] SDDecl ::= S 'standalone' Eq
9195 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9196 *
9197 * [ VC: Standalone Document Declaration ]
9198 * TODO The standalone document declaration must have the value "no"
9199 * if any external markup declarations contain declarations of:
9200 * - attributes with default values, if elements to which these
9201 * attributes apply appear in the document without specifications
9202 * of values for these attributes, or
9203 * - entities (other than amp, lt, gt, apos, quot), if references
9204 * to those entities appear in the document, or
9205 * - attributes with values subject to normalization, where the
9206 * attribute appears in the document with a value which will change
9207 * as a result of normalization, or
9208 * - element types with element content, if white space occurs directly
9209 * within any instance of those types.
9210 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009211 * Returns:
9212 * 1 if standalone="yes"
9213 * 0 if standalone="no"
9214 * -2 if standalone attribute is missing or invalid
9215 * (A standalone value of -2 means that the XML declaration was found,
9216 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009217 */
9218
9219int
9220xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009221 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009222
9223 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009224 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009225 SKIP(10);
9226 SKIP_BLANKS;
9227 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009228 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009229 return(standalone);
9230 }
9231 NEXT;
9232 SKIP_BLANKS;
9233 if (RAW == '\''){
9234 NEXT;
9235 if ((RAW == 'n') && (NXT(1) == 'o')) {
9236 standalone = 0;
9237 SKIP(2);
9238 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9239 (NXT(2) == 's')) {
9240 standalone = 1;
9241 SKIP(3);
9242 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009243 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009244 }
9245 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009246 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009247 } else
9248 NEXT;
9249 } else if (RAW == '"'){
9250 NEXT;
9251 if ((RAW == 'n') && (NXT(1) == 'o')) {
9252 standalone = 0;
9253 SKIP(2);
9254 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9255 (NXT(2) == 's')) {
9256 standalone = 1;
9257 SKIP(3);
9258 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009259 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009260 }
9261 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009262 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009263 } else
9264 NEXT;
9265 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009267 }
9268 }
9269 return(standalone);
9270}
9271
9272/**
9273 * xmlParseXMLDecl:
9274 * @ctxt: an XML parser context
9275 *
9276 * parse an XML declaration header
9277 *
9278 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9279 */
9280
9281void
9282xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9283 xmlChar *version;
9284
9285 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009286 * This value for standalone indicates that the document has an
9287 * XML declaration but it does not have a standalone attribute.
9288 * It will be overwritten later if a standalone attribute is found.
9289 */
9290 ctxt->input->standalone = -2;
9291
9292 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009293 * We know that '<?xml' is here.
9294 */
9295 SKIP(5);
9296
William M. Brack76e95df2003-10-18 16:20:14 +00009297 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9299 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009300 }
9301 SKIP_BLANKS;
9302
9303 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009304 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009305 */
9306 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009307 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009308 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009309 } else {
9310 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9311 /*
9312 * TODO: Blueberry should be detected here
9313 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009314 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9315 "Unsupported version '%s'\n",
9316 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009317 }
9318 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009319 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009320 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009321 }
Owen Taylor3473f882001-02-23 17:55:21 +00009322
9323 /*
9324 * We may have the encoding declaration
9325 */
William M. Brack76e95df2003-10-18 16:20:14 +00009326 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009327 if ((RAW == '?') && (NXT(1) == '>')) {
9328 SKIP(2);
9329 return;
9330 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009332 }
9333 xmlParseEncodingDecl(ctxt);
9334 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9335 /*
9336 * The XML REC instructs us to stop parsing right here
9337 */
9338 return;
9339 }
9340
9341 /*
9342 * We may have the standalone status.
9343 */
William M. Brack76e95df2003-10-18 16:20:14 +00009344 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009345 if ((RAW == '?') && (NXT(1) == '>')) {
9346 SKIP(2);
9347 return;
9348 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009349 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009350 }
9351 SKIP_BLANKS;
9352 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9353
9354 SKIP_BLANKS;
9355 if ((RAW == '?') && (NXT(1) == '>')) {
9356 SKIP(2);
9357 } else if (RAW == '>') {
9358 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009359 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009360 NEXT;
9361 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009362 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009363 MOVETO_ENDTAG(CUR_PTR);
9364 NEXT;
9365 }
9366}
9367
9368/**
9369 * xmlParseMisc:
9370 * @ctxt: an XML parser context
9371 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009372 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009373 *
9374 * [27] Misc ::= Comment | PI | S
9375 */
9376
9377void
9378xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009379 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009380 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009381 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009382 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009383 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009384 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009385 NEXT;
9386 } else
9387 xmlParseComment(ctxt);
9388 }
9389}
9390
9391/**
9392 * xmlParseDocument:
9393 * @ctxt: an XML parser context
9394 *
9395 * parse an XML document (and build a tree if using the standard SAX
9396 * interface).
9397 *
9398 * [1] document ::= prolog element Misc*
9399 *
9400 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9401 *
9402 * Returns 0, -1 in case of error. the parser context is augmented
9403 * as a result of the parsing.
9404 */
9405
9406int
9407xmlParseDocument(xmlParserCtxtPtr ctxt) {
9408 xmlChar start[4];
9409 xmlCharEncoding enc;
9410
9411 xmlInitParser();
9412
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009413 if ((ctxt == NULL) || (ctxt->input == NULL))
9414 return(-1);
9415
Owen Taylor3473f882001-02-23 17:55:21 +00009416 GROW;
9417
9418 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 * SAX: detecting the level.
9420 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009421 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009422
9423 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009424 * SAX: beginning of the document processing.
9425 */
9426 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9427 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9428
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009429 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9430 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009431 /*
9432 * Get the 4 first bytes and decode the charset
9433 * if enc != XML_CHAR_ENCODING_NONE
9434 * plug some encoding conversion routines.
9435 */
9436 start[0] = RAW;
9437 start[1] = NXT(1);
9438 start[2] = NXT(2);
9439 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009440 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009441 if (enc != XML_CHAR_ENCODING_NONE) {
9442 xmlSwitchEncoding(ctxt, enc);
9443 }
Owen Taylor3473f882001-02-23 17:55:21 +00009444 }
9445
9446
9447 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009448 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009449 }
9450
9451 /*
9452 * Check for the XMLDecl in the Prolog.
9453 */
9454 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009455 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009456
9457 /*
9458 * Note that we will switch encoding on the fly.
9459 */
9460 xmlParseXMLDecl(ctxt);
9461 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9462 /*
9463 * The XML REC instructs us to stop parsing right here
9464 */
9465 return(-1);
9466 }
9467 ctxt->standalone = ctxt->input->standalone;
9468 SKIP_BLANKS;
9469 } else {
9470 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9471 }
9472 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9473 ctxt->sax->startDocument(ctxt->userData);
9474
9475 /*
9476 * The Misc part of the Prolog
9477 */
9478 GROW;
9479 xmlParseMisc(ctxt);
9480
9481 /*
9482 * Then possibly doc type declaration(s) and more Misc
9483 * (doctypedecl Misc*)?
9484 */
9485 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009486 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009487
9488 ctxt->inSubset = 1;
9489 xmlParseDocTypeDecl(ctxt);
9490 if (RAW == '[') {
9491 ctxt->instate = XML_PARSER_DTD;
9492 xmlParseInternalSubset(ctxt);
9493 }
9494
9495 /*
9496 * Create and update the external subset.
9497 */
9498 ctxt->inSubset = 2;
9499 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9500 (!ctxt->disableSAX))
9501 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9502 ctxt->extSubSystem, ctxt->extSubURI);
9503 ctxt->inSubset = 0;
9504
Daniel Veillardac4118d2008-01-11 05:27:32 +00009505 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009506
9507 ctxt->instate = XML_PARSER_PROLOG;
9508 xmlParseMisc(ctxt);
9509 }
9510
9511 /*
9512 * Time to start parsing the tree itself
9513 */
9514 GROW;
9515 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009516 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9517 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009518 } else {
9519 ctxt->instate = XML_PARSER_CONTENT;
9520 xmlParseElement(ctxt);
9521 ctxt->instate = XML_PARSER_EPILOG;
9522
9523
9524 /*
9525 * The Misc part at the end
9526 */
9527 xmlParseMisc(ctxt);
9528
Daniel Veillard561b7f82002-03-20 21:55:57 +00009529 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009530 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009531 }
9532 ctxt->instate = XML_PARSER_EOF;
9533 }
9534
9535 /*
9536 * SAX: end of the document processing.
9537 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009538 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009539 ctxt->sax->endDocument(ctxt->userData);
9540
Daniel Veillard5997aca2002-03-18 18:36:20 +00009541 /*
9542 * Remove locally kept entity definitions if the tree was not built
9543 */
9544 if ((ctxt->myDoc != NULL) &&
9545 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9546 xmlFreeDoc(ctxt->myDoc);
9547 ctxt->myDoc = NULL;
9548 }
9549
Daniel Veillardc7612992002-02-17 22:47:37 +00009550 if (! ctxt->wellFormed) {
9551 ctxt->valid = 0;
9552 return(-1);
9553 }
Owen Taylor3473f882001-02-23 17:55:21 +00009554 return(0);
9555}
9556
9557/**
9558 * xmlParseExtParsedEnt:
9559 * @ctxt: an XML parser context
9560 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009561 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009562 * An external general parsed entity is well-formed if it matches the
9563 * production labeled extParsedEnt.
9564 *
9565 * [78] extParsedEnt ::= TextDecl? content
9566 *
9567 * Returns 0, -1 in case of error. the parser context is augmented
9568 * as a result of the parsing.
9569 */
9570
9571int
9572xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9573 xmlChar start[4];
9574 xmlCharEncoding enc;
9575
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009576 if ((ctxt == NULL) || (ctxt->input == NULL))
9577 return(-1);
9578
Owen Taylor3473f882001-02-23 17:55:21 +00009579 xmlDefaultSAXHandlerInit();
9580
Daniel Veillard309f81d2003-09-23 09:02:53 +00009581 xmlDetectSAX2(ctxt);
9582
Owen Taylor3473f882001-02-23 17:55:21 +00009583 GROW;
9584
9585 /*
9586 * SAX: beginning of the document processing.
9587 */
9588 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9589 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9590
9591 /*
9592 * Get the 4 first bytes and decode the charset
9593 * if enc != XML_CHAR_ENCODING_NONE
9594 * plug some encoding conversion routines.
9595 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009596 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9597 start[0] = RAW;
9598 start[1] = NXT(1);
9599 start[2] = NXT(2);
9600 start[3] = NXT(3);
9601 enc = xmlDetectCharEncoding(start, 4);
9602 if (enc != XML_CHAR_ENCODING_NONE) {
9603 xmlSwitchEncoding(ctxt, enc);
9604 }
Owen Taylor3473f882001-02-23 17:55:21 +00009605 }
9606
9607
9608 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009609 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009610 }
9611
9612 /*
9613 * Check for the XMLDecl in the Prolog.
9614 */
9615 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009616 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009617
9618 /*
9619 * Note that we will switch encoding on the fly.
9620 */
9621 xmlParseXMLDecl(ctxt);
9622 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9623 /*
9624 * The XML REC instructs us to stop parsing right here
9625 */
9626 return(-1);
9627 }
9628 SKIP_BLANKS;
9629 } else {
9630 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9631 }
9632 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9633 ctxt->sax->startDocument(ctxt->userData);
9634
9635 /*
9636 * Doing validity checking on chunk doesn't make sense
9637 */
9638 ctxt->instate = XML_PARSER_CONTENT;
9639 ctxt->validate = 0;
9640 ctxt->loadsubset = 0;
9641 ctxt->depth = 0;
9642
9643 xmlParseContent(ctxt);
9644
9645 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009647 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009648 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009649 }
9650
9651 /*
9652 * SAX: end of the document processing.
9653 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009654 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009655 ctxt->sax->endDocument(ctxt->userData);
9656
9657 if (! ctxt->wellFormed) return(-1);
9658 return(0);
9659}
9660
Daniel Veillard73b013f2003-09-30 12:36:01 +00009661#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009662/************************************************************************
9663 * *
9664 * Progressive parsing interfaces *
9665 * *
9666 ************************************************************************/
9667
9668/**
9669 * xmlParseLookupSequence:
9670 * @ctxt: an XML parser context
9671 * @first: the first char to lookup
9672 * @next: the next char to lookup or zero
9673 * @third: the next char to lookup or zero
9674 *
9675 * Try to find if a sequence (first, next, third) or just (first next) or
9676 * (first) is available in the input stream.
9677 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9678 * to avoid rescanning sequences of bytes, it DOES change the state of the
9679 * parser, do not use liberally.
9680 *
9681 * Returns the index to the current parsing point if the full sequence
9682 * is available, -1 otherwise.
9683 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009684static int
Owen Taylor3473f882001-02-23 17:55:21 +00009685xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9686 xmlChar next, xmlChar third) {
9687 int base, len;
9688 xmlParserInputPtr in;
9689 const xmlChar *buf;
9690
9691 in = ctxt->input;
9692 if (in == NULL) return(-1);
9693 base = in->cur - in->base;
9694 if (base < 0) return(-1);
9695 if (ctxt->checkIndex > base)
9696 base = ctxt->checkIndex;
9697 if (in->buf == NULL) {
9698 buf = in->base;
9699 len = in->length;
9700 } else {
9701 buf = in->buf->buffer->content;
9702 len = in->buf->buffer->use;
9703 }
9704 /* take into account the sequence length */
9705 if (third) len -= 2;
9706 else if (next) len --;
9707 for (;base < len;base++) {
9708 if (buf[base] == first) {
9709 if (third != 0) {
9710 if ((buf[base + 1] != next) ||
9711 (buf[base + 2] != third)) continue;
9712 } else if (next != 0) {
9713 if (buf[base + 1] != next) continue;
9714 }
9715 ctxt->checkIndex = 0;
9716#ifdef DEBUG_PUSH
9717 if (next == 0)
9718 xmlGenericError(xmlGenericErrorContext,
9719 "PP: lookup '%c' found at %d\n",
9720 first, base);
9721 else if (third == 0)
9722 xmlGenericError(xmlGenericErrorContext,
9723 "PP: lookup '%c%c' found at %d\n",
9724 first, next, base);
9725 else
9726 xmlGenericError(xmlGenericErrorContext,
9727 "PP: lookup '%c%c%c' found at %d\n",
9728 first, next, third, base);
9729#endif
9730 return(base - (in->cur - in->base));
9731 }
9732 }
9733 ctxt->checkIndex = base;
9734#ifdef DEBUG_PUSH
9735 if (next == 0)
9736 xmlGenericError(xmlGenericErrorContext,
9737 "PP: lookup '%c' failed\n", first);
9738 else if (third == 0)
9739 xmlGenericError(xmlGenericErrorContext,
9740 "PP: lookup '%c%c' failed\n", first, next);
9741 else
9742 xmlGenericError(xmlGenericErrorContext,
9743 "PP: lookup '%c%c%c' failed\n", first, next, third);
9744#endif
9745 return(-1);
9746}
9747
9748/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009749 * xmlParseGetLasts:
9750 * @ctxt: an XML parser context
9751 * @lastlt: pointer to store the last '<' from the input
9752 * @lastgt: pointer to store the last '>' from the input
9753 *
9754 * Lookup the last < and > in the current chunk
9755 */
9756static void
9757xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9758 const xmlChar **lastgt) {
9759 const xmlChar *tmp;
9760
9761 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9762 xmlGenericError(xmlGenericErrorContext,
9763 "Internal error: xmlParseGetLasts\n");
9764 return;
9765 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009766 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009767 tmp = ctxt->input->end;
9768 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009769 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009770 if (tmp < ctxt->input->base) {
9771 *lastlt = NULL;
9772 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009773 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009774 *lastlt = tmp;
9775 tmp++;
9776 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9777 if (*tmp == '\'') {
9778 tmp++;
9779 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9780 if (tmp < ctxt->input->end) tmp++;
9781 } else if (*tmp == '"') {
9782 tmp++;
9783 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9784 if (tmp < ctxt->input->end) tmp++;
9785 } else
9786 tmp++;
9787 }
9788 if (tmp < ctxt->input->end)
9789 *lastgt = tmp;
9790 else {
9791 tmp = *lastlt;
9792 tmp--;
9793 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9794 if (tmp >= ctxt->input->base)
9795 *lastgt = tmp;
9796 else
9797 *lastgt = NULL;
9798 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009799 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009800 } else {
9801 *lastlt = NULL;
9802 *lastgt = NULL;
9803 }
9804}
9805/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009806 * xmlCheckCdataPush:
9807 * @cur: pointer to the bock of characters
9808 * @len: length of the block in bytes
9809 *
9810 * Check that the block of characters is okay as SCdata content [20]
9811 *
9812 * Returns the number of bytes to pass if okay, a negative index where an
9813 * UTF-8 error occured otherwise
9814 */
9815static int
9816xmlCheckCdataPush(const xmlChar *utf, int len) {
9817 int ix;
9818 unsigned char c;
9819 int codepoint;
9820
9821 if ((utf == NULL) || (len <= 0))
9822 return(0);
9823
9824 for (ix = 0; ix < len;) { /* string is 0-terminated */
9825 c = utf[ix];
9826 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9827 if (c >= 0x20)
9828 ix++;
9829 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9830 ix++;
9831 else
9832 return(-ix);
9833 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9834 if (ix + 2 > len) return(ix);
9835 if ((utf[ix+1] & 0xc0 ) != 0x80)
9836 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009837 codepoint = (utf[ix] & 0x1f) << 6;
9838 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009839 if (!xmlIsCharQ(codepoint))
9840 return(-ix);
9841 ix += 2;
9842 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9843 if (ix + 3 > len) return(ix);
9844 if (((utf[ix+1] & 0xc0) != 0x80) ||
9845 ((utf[ix+2] & 0xc0) != 0x80))
9846 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009847 codepoint = (utf[ix] & 0xf) << 12;
9848 codepoint |= (utf[ix+1] & 0x3f) << 6;
9849 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009850 if (!xmlIsCharQ(codepoint))
9851 return(-ix);
9852 ix += 3;
9853 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9854 if (ix + 4 > len) return(ix);
9855 if (((utf[ix+1] & 0xc0) != 0x80) ||
9856 ((utf[ix+2] & 0xc0) != 0x80) ||
9857 ((utf[ix+3] & 0xc0) != 0x80))
9858 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009859 codepoint = (utf[ix] & 0x7) << 18;
9860 codepoint |= (utf[ix+1] & 0x3f) << 12;
9861 codepoint |= (utf[ix+2] & 0x3f) << 6;
9862 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009863 if (!xmlIsCharQ(codepoint))
9864 return(-ix);
9865 ix += 4;
9866 } else /* unknown encoding */
9867 return(-ix);
9868 }
9869 return(ix);
9870}
9871
9872/**
Owen Taylor3473f882001-02-23 17:55:21 +00009873 * xmlParseTryOrFinish:
9874 * @ctxt: an XML parser context
9875 * @terminate: last chunk indicator
9876 *
9877 * Try to progress on parsing
9878 *
9879 * Returns zero if no parsing was possible
9880 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009881static int
Owen Taylor3473f882001-02-23 17:55:21 +00009882xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9883 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009884 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009885 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009886 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009887
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009888 if (ctxt->input == NULL)
9889 return(0);
9890
Owen Taylor3473f882001-02-23 17:55:21 +00009891#ifdef DEBUG_PUSH
9892 switch (ctxt->instate) {
9893 case XML_PARSER_EOF:
9894 xmlGenericError(xmlGenericErrorContext,
9895 "PP: try EOF\n"); break;
9896 case XML_PARSER_START:
9897 xmlGenericError(xmlGenericErrorContext,
9898 "PP: try START\n"); break;
9899 case XML_PARSER_MISC:
9900 xmlGenericError(xmlGenericErrorContext,
9901 "PP: try MISC\n");break;
9902 case XML_PARSER_COMMENT:
9903 xmlGenericError(xmlGenericErrorContext,
9904 "PP: try COMMENT\n");break;
9905 case XML_PARSER_PROLOG:
9906 xmlGenericError(xmlGenericErrorContext,
9907 "PP: try PROLOG\n");break;
9908 case XML_PARSER_START_TAG:
9909 xmlGenericError(xmlGenericErrorContext,
9910 "PP: try START_TAG\n");break;
9911 case XML_PARSER_CONTENT:
9912 xmlGenericError(xmlGenericErrorContext,
9913 "PP: try CONTENT\n");break;
9914 case XML_PARSER_CDATA_SECTION:
9915 xmlGenericError(xmlGenericErrorContext,
9916 "PP: try CDATA_SECTION\n");break;
9917 case XML_PARSER_END_TAG:
9918 xmlGenericError(xmlGenericErrorContext,
9919 "PP: try END_TAG\n");break;
9920 case XML_PARSER_ENTITY_DECL:
9921 xmlGenericError(xmlGenericErrorContext,
9922 "PP: try ENTITY_DECL\n");break;
9923 case XML_PARSER_ENTITY_VALUE:
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: try ENTITY_VALUE\n");break;
9926 case XML_PARSER_ATTRIBUTE_VALUE:
9927 xmlGenericError(xmlGenericErrorContext,
9928 "PP: try ATTRIBUTE_VALUE\n");break;
9929 case XML_PARSER_DTD:
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: try DTD\n");break;
9932 case XML_PARSER_EPILOG:
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: try EPILOG\n");break;
9935 case XML_PARSER_PI:
9936 xmlGenericError(xmlGenericErrorContext,
9937 "PP: try PI\n");break;
9938 case XML_PARSER_IGNORE:
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: try IGNORE\n");break;
9941 }
9942#endif
9943
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009944 if ((ctxt->input != NULL) &&
9945 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009946 xmlSHRINK(ctxt);
9947 ctxt->checkIndex = 0;
9948 }
9949 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009950
Daniel Veillarda880b122003-04-21 21:36:41 +00009951 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009952 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009953 return(0);
9954
9955
Owen Taylor3473f882001-02-23 17:55:21 +00009956 /*
9957 * Pop-up of finished entities.
9958 */
9959 while ((RAW == 0) && (ctxt->inputNr > 1))
9960 xmlPopInput(ctxt);
9961
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009962 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009963 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009964 avail = ctxt->input->length -
9965 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009966 else {
9967 /*
9968 * If we are operating on converted input, try to flush
9969 * remainng chars to avoid them stalling in the non-converted
9970 * buffer.
9971 */
9972 if ((ctxt->input->buf->raw != NULL) &&
9973 (ctxt->input->buf->raw->use > 0)) {
9974 int base = ctxt->input->base -
9975 ctxt->input->buf->buffer->content;
9976 int current = ctxt->input->cur - ctxt->input->base;
9977
9978 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9979 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9980 ctxt->input->cur = ctxt->input->base + current;
9981 ctxt->input->end =
9982 &ctxt->input->buf->buffer->content[
9983 ctxt->input->buf->buffer->use];
9984 }
9985 avail = ctxt->input->buf->buffer->use -
9986 (ctxt->input->cur - ctxt->input->base);
9987 }
Owen Taylor3473f882001-02-23 17:55:21 +00009988 if (avail < 1)
9989 goto done;
9990 switch (ctxt->instate) {
9991 case XML_PARSER_EOF:
9992 /*
9993 * Document parsing is done !
9994 */
9995 goto done;
9996 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009997 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9998 xmlChar start[4];
9999 xmlCharEncoding enc;
10000
10001 /*
10002 * Very first chars read from the document flow.
10003 */
10004 if (avail < 4)
10005 goto done;
10006
10007 /*
10008 * Get the 4 first bytes and decode the charset
10009 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010010 * plug some encoding conversion routines,
10011 * else xmlSwitchEncoding will set to (default)
10012 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010013 */
10014 start[0] = RAW;
10015 start[1] = NXT(1);
10016 start[2] = NXT(2);
10017 start[3] = NXT(3);
10018 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010019 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010020 break;
10021 }
Owen Taylor3473f882001-02-23 17:55:21 +000010022
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010023 if (avail < 2)
10024 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010025 cur = ctxt->input->cur[0];
10026 next = ctxt->input->cur[1];
10027 if (cur == 0) {
10028 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10029 ctxt->sax->setDocumentLocator(ctxt->userData,
10030 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010031 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010032 ctxt->instate = XML_PARSER_EOF;
10033#ifdef DEBUG_PUSH
10034 xmlGenericError(xmlGenericErrorContext,
10035 "PP: entering EOF\n");
10036#endif
10037 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10038 ctxt->sax->endDocument(ctxt->userData);
10039 goto done;
10040 }
10041 if ((cur == '<') && (next == '?')) {
10042 /* PI or XML decl */
10043 if (avail < 5) return(ret);
10044 if ((!terminate) &&
10045 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10046 return(ret);
10047 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10048 ctxt->sax->setDocumentLocator(ctxt->userData,
10049 &xmlDefaultSAXLocator);
10050 if ((ctxt->input->cur[2] == 'x') &&
10051 (ctxt->input->cur[3] == 'm') &&
10052 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010053 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010054 ret += 5;
10055#ifdef DEBUG_PUSH
10056 xmlGenericError(xmlGenericErrorContext,
10057 "PP: Parsing XML Decl\n");
10058#endif
10059 xmlParseXMLDecl(ctxt);
10060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10061 /*
10062 * The XML REC instructs us to stop parsing right
10063 * here
10064 */
10065 ctxt->instate = XML_PARSER_EOF;
10066 return(0);
10067 }
10068 ctxt->standalone = ctxt->input->standalone;
10069 if ((ctxt->encoding == NULL) &&
10070 (ctxt->input->encoding != NULL))
10071 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10072 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10073 (!ctxt->disableSAX))
10074 ctxt->sax->startDocument(ctxt->userData);
10075 ctxt->instate = XML_PARSER_MISC;
10076#ifdef DEBUG_PUSH
10077 xmlGenericError(xmlGenericErrorContext,
10078 "PP: entering MISC\n");
10079#endif
10080 } else {
10081 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10082 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10083 (!ctxt->disableSAX))
10084 ctxt->sax->startDocument(ctxt->userData);
10085 ctxt->instate = XML_PARSER_MISC;
10086#ifdef DEBUG_PUSH
10087 xmlGenericError(xmlGenericErrorContext,
10088 "PP: entering MISC\n");
10089#endif
10090 }
10091 } else {
10092 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10093 ctxt->sax->setDocumentLocator(ctxt->userData,
10094 &xmlDefaultSAXLocator);
10095 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010096 if (ctxt->version == NULL) {
10097 xmlErrMemory(ctxt, NULL);
10098 break;
10099 }
Owen Taylor3473f882001-02-23 17:55:21 +000010100 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10101 (!ctxt->disableSAX))
10102 ctxt->sax->startDocument(ctxt->userData);
10103 ctxt->instate = XML_PARSER_MISC;
10104#ifdef DEBUG_PUSH
10105 xmlGenericError(xmlGenericErrorContext,
10106 "PP: entering MISC\n");
10107#endif
10108 }
10109 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010110 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010111 const xmlChar *name;
10112 const xmlChar *prefix;
10113 const xmlChar *URI;
10114 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010115
10116 if ((avail < 2) && (ctxt->inputNr == 1))
10117 goto done;
10118 cur = ctxt->input->cur[0];
10119 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010120 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010121 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010122 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10123 ctxt->sax->endDocument(ctxt->userData);
10124 goto done;
10125 }
10126 if (!terminate) {
10127 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010128 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010129 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010130 goto done;
10131 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10132 goto done;
10133 }
10134 }
10135 if (ctxt->spaceNr == 0)
10136 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010137 else if (*ctxt->space == -2)
10138 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010139 else
10140 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010141#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010142 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010143#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010144 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010145#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010146 else
10147 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010148#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010149 if (name == NULL) {
10150 spacePop(ctxt);
10151 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010152 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10153 ctxt->sax->endDocument(ctxt->userData);
10154 goto done;
10155 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010156#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010157 /*
10158 * [ VC: Root Element Type ]
10159 * The Name in the document type declaration must match
10160 * the element type of the root element.
10161 */
10162 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010165#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010166
10167 /*
10168 * Check for an Empty Element.
10169 */
10170 if ((RAW == '/') && (NXT(1) == '>')) {
10171 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010172
10173 if (ctxt->sax2) {
10174 if ((ctxt->sax != NULL) &&
10175 (ctxt->sax->endElementNs != NULL) &&
10176 (!ctxt->disableSAX))
10177 ctxt->sax->endElementNs(ctxt->userData, name,
10178 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010179 if (ctxt->nsNr - nsNr > 0)
10180 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010181#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010182 } else {
10183 if ((ctxt->sax != NULL) &&
10184 (ctxt->sax->endElement != NULL) &&
10185 (!ctxt->disableSAX))
10186 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010187#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010188 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010189 spacePop(ctxt);
10190 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010191 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010192 } else {
10193 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010194 }
10195 break;
10196 }
10197 if (RAW == '>') {
10198 NEXT;
10199 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010200 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010201 "Couldn't find end of Start Tag %s\n",
10202 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010203 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010204 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010205 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010206 if (ctxt->sax2)
10207 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010208#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010209 else
10210 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010211#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010212
Daniel Veillarda880b122003-04-21 21:36:41 +000010213 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010214 break;
10215 }
10216 case XML_PARSER_CONTENT: {
10217 const xmlChar *test;
10218 unsigned int cons;
10219 if ((avail < 2) && (ctxt->inputNr == 1))
10220 goto done;
10221 cur = ctxt->input->cur[0];
10222 next = ctxt->input->cur[1];
10223
10224 test = CUR_PTR;
10225 cons = ctxt->input->consumed;
10226 if ((cur == '<') && (next == '/')) {
10227 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010228 break;
10229 } else if ((cur == '<') && (next == '?')) {
10230 if ((!terminate) &&
10231 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10232 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010233 xmlParsePI(ctxt);
10234 } else if ((cur == '<') && (next != '!')) {
10235 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010236 break;
10237 } else if ((cur == '<') && (next == '!') &&
10238 (ctxt->input->cur[2] == '-') &&
10239 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010240 int term;
10241
10242 if (avail < 4)
10243 goto done;
10244 ctxt->input->cur += 4;
10245 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10246 ctxt->input->cur -= 4;
10247 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010248 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010249 xmlParseComment(ctxt);
10250 ctxt->instate = XML_PARSER_CONTENT;
10251 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10252 (ctxt->input->cur[2] == '[') &&
10253 (ctxt->input->cur[3] == 'C') &&
10254 (ctxt->input->cur[4] == 'D') &&
10255 (ctxt->input->cur[5] == 'A') &&
10256 (ctxt->input->cur[6] == 'T') &&
10257 (ctxt->input->cur[7] == 'A') &&
10258 (ctxt->input->cur[8] == '[')) {
10259 SKIP(9);
10260 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010261 break;
10262 } else if ((cur == '<') && (next == '!') &&
10263 (avail < 9)) {
10264 goto done;
10265 } else if (cur == '&') {
10266 if ((!terminate) &&
10267 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10268 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010269 xmlParseReference(ctxt);
10270 } else {
10271 /* TODO Avoid the extra copy, handle directly !!! */
10272 /*
10273 * Goal of the following test is:
10274 * - minimize calls to the SAX 'character' callback
10275 * when they are mergeable
10276 * - handle an problem for isBlank when we only parse
10277 * a sequence of blank chars and the next one is
10278 * not available to check against '<' presence.
10279 * - tries to homogenize the differences in SAX
10280 * callbacks between the push and pull versions
10281 * of the parser.
10282 */
10283 if ((ctxt->inputNr == 1) &&
10284 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10285 if (!terminate) {
10286 if (ctxt->progressive) {
10287 if ((lastlt == NULL) ||
10288 (ctxt->input->cur > lastlt))
10289 goto done;
10290 } else if (xmlParseLookupSequence(ctxt,
10291 '<', 0, 0) < 0) {
10292 goto done;
10293 }
10294 }
10295 }
10296 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010297 xmlParseCharData(ctxt, 0);
10298 }
10299 /*
10300 * Pop-up of finished entities.
10301 */
10302 while ((RAW == 0) && (ctxt->inputNr > 1))
10303 xmlPopInput(ctxt);
10304 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010305 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10306 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010307 ctxt->instate = XML_PARSER_EOF;
10308 break;
10309 }
10310 break;
10311 }
10312 case XML_PARSER_END_TAG:
10313 if (avail < 2)
10314 goto done;
10315 if (!terminate) {
10316 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010317 /* > can be found unescaped in attribute values */
10318 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010319 goto done;
10320 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10321 goto done;
10322 }
10323 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010324 if (ctxt->sax2) {
10325 xmlParseEndTag2(ctxt,
10326 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10327 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010328 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010329 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010330 }
10331#ifdef LIBXML_SAX1_ENABLED
10332 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010333 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010334#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010335 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010336 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010337 } else {
10338 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010339 }
10340 break;
10341 case XML_PARSER_CDATA_SECTION: {
10342 /*
10343 * The Push mode need to have the SAX callback for
10344 * cdataBlock merge back contiguous callbacks.
10345 */
10346 int base;
10347
10348 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10349 if (base < 0) {
10350 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010351 int tmp;
10352
10353 tmp = xmlCheckCdataPush(ctxt->input->cur,
10354 XML_PARSER_BIG_BUFFER_SIZE);
10355 if (tmp < 0) {
10356 tmp = -tmp;
10357 ctxt->input->cur += tmp;
10358 goto encoding_error;
10359 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010360 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10361 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010362 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010363 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010364 else if (ctxt->sax->characters != NULL)
10365 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010366 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010367 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010368 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010369 ctxt->checkIndex = 0;
10370 }
10371 goto done;
10372 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010373 int tmp;
10374
10375 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10376 if ((tmp < 0) || (tmp != base)) {
10377 tmp = -tmp;
10378 ctxt->input->cur += tmp;
10379 goto encoding_error;
10380 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010381 if ((ctxt->sax != NULL) && (base == 0) &&
10382 (ctxt->sax->cdataBlock != NULL) &&
10383 (!ctxt->disableSAX)) {
10384 /*
10385 * Special case to provide identical behaviour
10386 * between pull and push parsers on enpty CDATA
10387 * sections
10388 */
10389 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10390 (!strncmp((const char *)&ctxt->input->cur[-9],
10391 "<![CDATA[", 9)))
10392 ctxt->sax->cdataBlock(ctxt->userData,
10393 BAD_CAST "", 0);
10394 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010395 (!ctxt->disableSAX)) {
10396 if (ctxt->sax->cdataBlock != NULL)
10397 ctxt->sax->cdataBlock(ctxt->userData,
10398 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010399 else if (ctxt->sax->characters != NULL)
10400 ctxt->sax->characters(ctxt->userData,
10401 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010402 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010403 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010404 ctxt->checkIndex = 0;
10405 ctxt->instate = XML_PARSER_CONTENT;
10406#ifdef DEBUG_PUSH
10407 xmlGenericError(xmlGenericErrorContext,
10408 "PP: entering CONTENT\n");
10409#endif
10410 }
10411 break;
10412 }
Owen Taylor3473f882001-02-23 17:55:21 +000010413 case XML_PARSER_MISC:
10414 SKIP_BLANKS;
10415 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010416 avail = ctxt->input->length -
10417 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010419 avail = ctxt->input->buf->buffer->use -
10420 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010421 if (avail < 2)
10422 goto done;
10423 cur = ctxt->input->cur[0];
10424 next = ctxt->input->cur[1];
10425 if ((cur == '<') && (next == '?')) {
10426 if ((!terminate) &&
10427 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10428 goto done;
10429#ifdef DEBUG_PUSH
10430 xmlGenericError(xmlGenericErrorContext,
10431 "PP: Parsing PI\n");
10432#endif
10433 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010434 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010435 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010436 (ctxt->input->cur[2] == '-') &&
10437 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010438 if ((!terminate) &&
10439 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10440 goto done;
10441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext,
10443 "PP: Parsing Comment\n");
10444#endif
10445 xmlParseComment(ctxt);
10446 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010447 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010448 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010449 (ctxt->input->cur[2] == 'D') &&
10450 (ctxt->input->cur[3] == 'O') &&
10451 (ctxt->input->cur[4] == 'C') &&
10452 (ctxt->input->cur[5] == 'T') &&
10453 (ctxt->input->cur[6] == 'Y') &&
10454 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010455 (ctxt->input->cur[8] == 'E')) {
10456 if ((!terminate) &&
10457 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10458 goto done;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: Parsing internal subset\n");
10462#endif
10463 ctxt->inSubset = 1;
10464 xmlParseDocTypeDecl(ctxt);
10465 if (RAW == '[') {
10466 ctxt->instate = XML_PARSER_DTD;
10467#ifdef DEBUG_PUSH
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: entering DTD\n");
10470#endif
10471 } else {
10472 /*
10473 * Create and update the external subset.
10474 */
10475 ctxt->inSubset = 2;
10476 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10477 (ctxt->sax->externalSubset != NULL))
10478 ctxt->sax->externalSubset(ctxt->userData,
10479 ctxt->intSubName, ctxt->extSubSystem,
10480 ctxt->extSubURI);
10481 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010482 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010483 ctxt->instate = XML_PARSER_PROLOG;
10484#ifdef DEBUG_PUSH
10485 xmlGenericError(xmlGenericErrorContext,
10486 "PP: entering PROLOG\n");
10487#endif
10488 }
10489 } else if ((cur == '<') && (next == '!') &&
10490 (avail < 9)) {
10491 goto done;
10492 } else {
10493 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010494 ctxt->progressive = 1;
10495 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010496#ifdef DEBUG_PUSH
10497 xmlGenericError(xmlGenericErrorContext,
10498 "PP: entering START_TAG\n");
10499#endif
10500 }
10501 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010502 case XML_PARSER_PROLOG:
10503 SKIP_BLANKS;
10504 if (ctxt->input->buf == NULL)
10505 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10506 else
10507 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10508 if (avail < 2)
10509 goto done;
10510 cur = ctxt->input->cur[0];
10511 next = ctxt->input->cur[1];
10512 if ((cur == '<') && (next == '?')) {
10513 if ((!terminate) &&
10514 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10515 goto done;
10516#ifdef DEBUG_PUSH
10517 xmlGenericError(xmlGenericErrorContext,
10518 "PP: Parsing PI\n");
10519#endif
10520 xmlParsePI(ctxt);
10521 } else if ((cur == '<') && (next == '!') &&
10522 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10523 if ((!terminate) &&
10524 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10525 goto done;
10526#ifdef DEBUG_PUSH
10527 xmlGenericError(xmlGenericErrorContext,
10528 "PP: Parsing Comment\n");
10529#endif
10530 xmlParseComment(ctxt);
10531 ctxt->instate = XML_PARSER_PROLOG;
10532 } else if ((cur == '<') && (next == '!') &&
10533 (avail < 4)) {
10534 goto done;
10535 } else {
10536 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010537 if (ctxt->progressive == 0)
10538 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010539 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010540#ifdef DEBUG_PUSH
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: entering START_TAG\n");
10543#endif
10544 }
10545 break;
10546 case XML_PARSER_EPILOG:
10547 SKIP_BLANKS;
10548 if (ctxt->input->buf == NULL)
10549 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10550 else
10551 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10552 if (avail < 2)
10553 goto done;
10554 cur = ctxt->input->cur[0];
10555 next = ctxt->input->cur[1];
10556 if ((cur == '<') && (next == '?')) {
10557 if ((!terminate) &&
10558 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10559 goto done;
10560#ifdef DEBUG_PUSH
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: Parsing PI\n");
10563#endif
10564 xmlParsePI(ctxt);
10565 ctxt->instate = XML_PARSER_EPILOG;
10566 } else if ((cur == '<') && (next == '!') &&
10567 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10568 if ((!terminate) &&
10569 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10570 goto done;
10571#ifdef DEBUG_PUSH
10572 xmlGenericError(xmlGenericErrorContext,
10573 "PP: Parsing Comment\n");
10574#endif
10575 xmlParseComment(ctxt);
10576 ctxt->instate = XML_PARSER_EPILOG;
10577 } else if ((cur == '<') && (next == '!') &&
10578 (avail < 4)) {
10579 goto done;
10580 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010581 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010582 ctxt->instate = XML_PARSER_EOF;
10583#ifdef DEBUG_PUSH
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: entering EOF\n");
10586#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010587 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010588 ctxt->sax->endDocument(ctxt->userData);
10589 goto done;
10590 }
10591 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010592 case XML_PARSER_DTD: {
10593 /*
10594 * Sorry but progressive parsing of the internal subset
10595 * is not expected to be supported. We first check that
10596 * the full content of the internal subset is available and
10597 * the parsing is launched only at that point.
10598 * Internal subset ends up with "']' S? '>'" in an unescaped
10599 * section and not in a ']]>' sequence which are conditional
10600 * sections (whoever argued to keep that crap in XML deserve
10601 * a place in hell !).
10602 */
10603 int base, i;
10604 xmlChar *buf;
10605 xmlChar quote = 0;
10606
10607 base = ctxt->input->cur - ctxt->input->base;
10608 if (base < 0) return(0);
10609 if (ctxt->checkIndex > base)
10610 base = ctxt->checkIndex;
10611 buf = ctxt->input->buf->buffer->content;
10612 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10613 base++) {
10614 if (quote != 0) {
10615 if (buf[base] == quote)
10616 quote = 0;
10617 continue;
10618 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010619 if ((quote == 0) && (buf[base] == '<')) {
10620 int found = 0;
10621 /* special handling of comments */
10622 if (((unsigned int) base + 4 <
10623 ctxt->input->buf->buffer->use) &&
10624 (buf[base + 1] == '!') &&
10625 (buf[base + 2] == '-') &&
10626 (buf[base + 3] == '-')) {
10627 for (;(unsigned int) base + 3 <
10628 ctxt->input->buf->buffer->use; base++) {
10629 if ((buf[base] == '-') &&
10630 (buf[base + 1] == '-') &&
10631 (buf[base + 2] == '>')) {
10632 found = 1;
10633 base += 2;
10634 break;
10635 }
10636 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010637 if (!found) {
10638#if 0
10639 fprintf(stderr, "unfinished comment\n");
10640#endif
10641 break; /* for */
10642 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010643 continue;
10644 }
10645 }
Owen Taylor3473f882001-02-23 17:55:21 +000010646 if (buf[base] == '"') {
10647 quote = '"';
10648 continue;
10649 }
10650 if (buf[base] == '\'') {
10651 quote = '\'';
10652 continue;
10653 }
10654 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010655#if 0
10656 fprintf(stderr, "%c%c%c%c: ", buf[base],
10657 buf[base + 1], buf[base + 2], buf[base + 3]);
10658#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010659 if ((unsigned int) base +1 >=
10660 ctxt->input->buf->buffer->use)
10661 break;
10662 if (buf[base + 1] == ']') {
10663 /* conditional crap, skip both ']' ! */
10664 base++;
10665 continue;
10666 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010667 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010668 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10669 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010670 if (buf[base + i] == '>') {
10671#if 0
10672 fprintf(stderr, "found\n");
10673#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010674 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010675 }
10676 if (!IS_BLANK_CH(buf[base + i])) {
10677#if 0
10678 fprintf(stderr, "not found\n");
10679#endif
10680 goto not_end_of_int_subset;
10681 }
Owen Taylor3473f882001-02-23 17:55:21 +000010682 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010683#if 0
10684 fprintf(stderr, "end of stream\n");
10685#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010686 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010687
Owen Taylor3473f882001-02-23 17:55:21 +000010688 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010689not_end_of_int_subset:
10690 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010691 }
10692 /*
10693 * We didn't found the end of the Internal subset
10694 */
Owen Taylor3473f882001-02-23 17:55:21 +000010695#ifdef DEBUG_PUSH
10696 if (next == 0)
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: lookup of int subset end filed\n");
10699#endif
10700 goto done;
10701
10702found_end_int_subset:
10703 xmlParseInternalSubset(ctxt);
10704 ctxt->inSubset = 2;
10705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10706 (ctxt->sax->externalSubset != NULL))
10707 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10708 ctxt->extSubSystem, ctxt->extSubURI);
10709 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010710 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010711 ctxt->instate = XML_PARSER_PROLOG;
10712 ctxt->checkIndex = 0;
10713#ifdef DEBUG_PUSH
10714 xmlGenericError(xmlGenericErrorContext,
10715 "PP: entering PROLOG\n");
10716#endif
10717 break;
10718 }
10719 case XML_PARSER_COMMENT:
10720 xmlGenericError(xmlGenericErrorContext,
10721 "PP: internal error, state == COMMENT\n");
10722 ctxt->instate = XML_PARSER_CONTENT;
10723#ifdef DEBUG_PUSH
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: entering CONTENT\n");
10726#endif
10727 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010728 case XML_PARSER_IGNORE:
10729 xmlGenericError(xmlGenericErrorContext,
10730 "PP: internal error, state == IGNORE");
10731 ctxt->instate = XML_PARSER_DTD;
10732#ifdef DEBUG_PUSH
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: entering DTD\n");
10735#endif
10736 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010737 case XML_PARSER_PI:
10738 xmlGenericError(xmlGenericErrorContext,
10739 "PP: internal error, state == PI\n");
10740 ctxt->instate = XML_PARSER_CONTENT;
10741#ifdef DEBUG_PUSH
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: entering CONTENT\n");
10744#endif
10745 break;
10746 case XML_PARSER_ENTITY_DECL:
10747 xmlGenericError(xmlGenericErrorContext,
10748 "PP: internal error, state == ENTITY_DECL\n");
10749 ctxt->instate = XML_PARSER_DTD;
10750#ifdef DEBUG_PUSH
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: entering DTD\n");
10753#endif
10754 break;
10755 case XML_PARSER_ENTITY_VALUE:
10756 xmlGenericError(xmlGenericErrorContext,
10757 "PP: internal error, state == ENTITY_VALUE\n");
10758 ctxt->instate = XML_PARSER_CONTENT;
10759#ifdef DEBUG_PUSH
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: entering DTD\n");
10762#endif
10763 break;
10764 case XML_PARSER_ATTRIBUTE_VALUE:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10767 ctxt->instate = XML_PARSER_START_TAG;
10768#ifdef DEBUG_PUSH
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: entering START_TAG\n");
10771#endif
10772 break;
10773 case XML_PARSER_SYSTEM_LITERAL:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: internal error, state == SYSTEM_LITERAL\n");
10776 ctxt->instate = XML_PARSER_START_TAG;
10777#ifdef DEBUG_PUSH
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: entering START_TAG\n");
10780#endif
10781 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010782 case XML_PARSER_PUBLIC_LITERAL:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: internal error, state == PUBLIC_LITERAL\n");
10785 ctxt->instate = XML_PARSER_START_TAG;
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext,
10788 "PP: entering START_TAG\n");
10789#endif
10790 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010791 }
10792 }
10793done:
10794#ifdef DEBUG_PUSH
10795 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10796#endif
10797 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010798encoding_error:
10799 {
10800 char buffer[150];
10801
10802 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10803 ctxt->input->cur[0], ctxt->input->cur[1],
10804 ctxt->input->cur[2], ctxt->input->cur[3]);
10805 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10806 "Input is not proper UTF-8, indicate encoding !\n%s",
10807 BAD_CAST buffer, NULL);
10808 }
10809 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010810}
10811
10812/**
Owen Taylor3473f882001-02-23 17:55:21 +000010813 * xmlParseChunk:
10814 * @ctxt: an XML parser context
10815 * @chunk: an char array
10816 * @size: the size in byte of the chunk
10817 * @terminate: last chunk indicator
10818 *
10819 * Parse a Chunk of memory
10820 *
10821 * Returns zero if no error, the xmlParserErrors otherwise.
10822 */
10823int
10824xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10825 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010826 int end_in_lf = 0;
10827
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010828 if (ctxt == NULL)
10829 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010830 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010831 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010832 if (ctxt->instate == XML_PARSER_START)
10833 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010834 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10835 (chunk[size - 1] == '\r')) {
10836 end_in_lf = 1;
10837 size--;
10838 }
Owen Taylor3473f882001-02-23 17:55:21 +000010839 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10840 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10841 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10842 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010843 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010844
William M. Bracka3215c72004-07-31 16:24:01 +000010845 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10846 if (res < 0) {
10847 ctxt->errNo = XML_PARSER_EOF;
10848 ctxt->disableSAX = 1;
10849 return (XML_PARSER_EOF);
10850 }
Owen Taylor3473f882001-02-23 17:55:21 +000010851 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10852 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010853 ctxt->input->end =
10854 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010855#ifdef DEBUG_PUSH
10856 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10857#endif
10858
Owen Taylor3473f882001-02-23 17:55:21 +000010859 } else if (ctxt->instate != XML_PARSER_EOF) {
10860 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10861 xmlParserInputBufferPtr in = ctxt->input->buf;
10862 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10863 (in->raw != NULL)) {
10864 int nbchars;
10865
10866 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10867 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010868 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010869 xmlGenericError(xmlGenericErrorContext,
10870 "xmlParseChunk: encoder error\n");
10871 return(XML_ERR_INVALID_ENCODING);
10872 }
10873 }
10874 }
10875 }
10876 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010877 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10878 (ctxt->input->buf != NULL)) {
10879 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10880 }
Daniel Veillard14412512005-01-21 23:53:26 +000010881 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010882 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010883 if (terminate) {
10884 /*
10885 * Check for termination
10886 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010887 int avail = 0;
10888
10889 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010890 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010891 avail = ctxt->input->length -
10892 (ctxt->input->cur - ctxt->input->base);
10893 else
10894 avail = ctxt->input->buf->buffer->use -
10895 (ctxt->input->cur - ctxt->input->base);
10896 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010897
Owen Taylor3473f882001-02-23 17:55:21 +000010898 if ((ctxt->instate != XML_PARSER_EOF) &&
10899 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010900 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010901 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010902 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010903 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010904 }
Owen Taylor3473f882001-02-23 17:55:21 +000010905 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010906 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010907 ctxt->sax->endDocument(ctxt->userData);
10908 }
10909 ctxt->instate = XML_PARSER_EOF;
10910 }
10911 return((xmlParserErrors) ctxt->errNo);
10912}
10913
10914/************************************************************************
10915 * *
10916 * I/O front end functions to the parser *
10917 * *
10918 ************************************************************************/
10919
10920/**
Owen Taylor3473f882001-02-23 17:55:21 +000010921 * xmlCreatePushParserCtxt:
10922 * @sax: a SAX handler
10923 * @user_data: The user data returned on SAX callbacks
10924 * @chunk: a pointer to an array of chars
10925 * @size: number of chars in the array
10926 * @filename: an optional file name or URI
10927 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010928 * Create a parser context for using the XML parser in push mode.
10929 * If @buffer and @size are non-NULL, the data is used to detect
10930 * the encoding. The remaining characters will be parsed so they
10931 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010932 * To allow content encoding detection, @size should be >= 4
10933 * The value of @filename is used for fetching external entities
10934 * and error/warning reports.
10935 *
10936 * Returns the new parser context or NULL
10937 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010938
Owen Taylor3473f882001-02-23 17:55:21 +000010939xmlParserCtxtPtr
10940xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10941 const char *chunk, int size, const char *filename) {
10942 xmlParserCtxtPtr ctxt;
10943 xmlParserInputPtr inputStream;
10944 xmlParserInputBufferPtr buf;
10945 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10946
10947 /*
10948 * plug some encoding conversion routines
10949 */
10950 if ((chunk != NULL) && (size >= 4))
10951 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10952
10953 buf = xmlAllocParserInputBuffer(enc);
10954 if (buf == NULL) return(NULL);
10955
10956 ctxt = xmlNewParserCtxt();
10957 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010958 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010959 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010960 return(NULL);
10961 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010962 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010963 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10964 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010965 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010966 xmlFreeParserInputBuffer(buf);
10967 xmlFreeParserCtxt(ctxt);
10968 return(NULL);
10969 }
Owen Taylor3473f882001-02-23 17:55:21 +000010970 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010971#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010972 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010973#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010974 xmlFree(ctxt->sax);
10975 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10976 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010977 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010978 xmlFreeParserInputBuffer(buf);
10979 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010980 return(NULL);
10981 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010982 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10983 if (sax->initialized == XML_SAX2_MAGIC)
10984 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10985 else
10986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010987 if (user_data != NULL)
10988 ctxt->userData = user_data;
10989 }
10990 if (filename == NULL) {
10991 ctxt->directory = NULL;
10992 } else {
10993 ctxt->directory = xmlParserGetDirectory(filename);
10994 }
10995
10996 inputStream = xmlNewInputStream(ctxt);
10997 if (inputStream == NULL) {
10998 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010999 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011000 return(NULL);
11001 }
11002
11003 if (filename == NULL)
11004 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011005 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011006 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011007 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011008 if (inputStream->filename == NULL) {
11009 xmlFreeParserCtxt(ctxt);
11010 xmlFreeParserInputBuffer(buf);
11011 return(NULL);
11012 }
11013 }
Owen Taylor3473f882001-02-23 17:55:21 +000011014 inputStream->buf = buf;
11015 inputStream->base = inputStream->buf->buffer->content;
11016 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011017 inputStream->end =
11018 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011019
11020 inputPush(ctxt, inputStream);
11021
William M. Brack3a1cd212005-02-11 14:35:54 +000011022 /*
11023 * If the caller didn't provide an initial 'chunk' for determining
11024 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11025 * that it can be automatically determined later
11026 */
11027 if ((size == 0) || (chunk == NULL)) {
11028 ctxt->charset = XML_CHAR_ENCODING_NONE;
11029 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011030 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11031 int cur = ctxt->input->cur - ctxt->input->base;
11032
Owen Taylor3473f882001-02-23 17:55:21 +000011033 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011034
11035 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11036 ctxt->input->cur = ctxt->input->base + cur;
11037 ctxt->input->end =
11038 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011039#ifdef DEBUG_PUSH
11040 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11041#endif
11042 }
11043
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011044 if (enc != XML_CHAR_ENCODING_NONE) {
11045 xmlSwitchEncoding(ctxt, enc);
11046 }
11047
Owen Taylor3473f882001-02-23 17:55:21 +000011048 return(ctxt);
11049}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011050#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011051
11052/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011053 * xmlStopParser:
11054 * @ctxt: an XML parser context
11055 *
11056 * Blocks further parser processing
11057 */
11058void
11059xmlStopParser(xmlParserCtxtPtr ctxt) {
11060 if (ctxt == NULL)
11061 return;
11062 ctxt->instate = XML_PARSER_EOF;
11063 ctxt->disableSAX = 1;
11064 if (ctxt->input != NULL) {
11065 ctxt->input->cur = BAD_CAST"";
11066 ctxt->input->base = ctxt->input->cur;
11067 }
11068}
11069
11070/**
Owen Taylor3473f882001-02-23 17:55:21 +000011071 * xmlCreateIOParserCtxt:
11072 * @sax: a SAX handler
11073 * @user_data: The user data returned on SAX callbacks
11074 * @ioread: an I/O read function
11075 * @ioclose: an I/O close function
11076 * @ioctx: an I/O handler
11077 * @enc: the charset encoding if known
11078 *
11079 * Create a parser context for using the XML parser with an existing
11080 * I/O stream
11081 *
11082 * Returns the new parser context or NULL
11083 */
11084xmlParserCtxtPtr
11085xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11086 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11087 void *ioctx, xmlCharEncoding enc) {
11088 xmlParserCtxtPtr ctxt;
11089 xmlParserInputPtr inputStream;
11090 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011091
11092 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011093
11094 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11095 if (buf == NULL) return(NULL);
11096
11097 ctxt = xmlNewParserCtxt();
11098 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011099 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011100 return(NULL);
11101 }
11102 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011103#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011104 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011105#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011106 xmlFree(ctxt->sax);
11107 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11108 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011109 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011110 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011111 return(NULL);
11112 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011113 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11114 if (sax->initialized == XML_SAX2_MAGIC)
11115 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11116 else
11117 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011118 if (user_data != NULL)
11119 ctxt->userData = user_data;
11120 }
11121
11122 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11123 if (inputStream == NULL) {
11124 xmlFreeParserCtxt(ctxt);
11125 return(NULL);
11126 }
11127 inputPush(ctxt, inputStream);
11128
11129 return(ctxt);
11130}
11131
Daniel Veillard4432df22003-09-28 18:58:27 +000011132#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011133/************************************************************************
11134 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011135 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011136 * *
11137 ************************************************************************/
11138
11139/**
11140 * xmlIOParseDTD:
11141 * @sax: the SAX handler block or NULL
11142 * @input: an Input Buffer
11143 * @enc: the charset encoding if known
11144 *
11145 * Load and parse a DTD
11146 *
11147 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011148 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011149 */
11150
11151xmlDtdPtr
11152xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11153 xmlCharEncoding enc) {
11154 xmlDtdPtr ret = NULL;
11155 xmlParserCtxtPtr ctxt;
11156 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011157 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011158
11159 if (input == NULL)
11160 return(NULL);
11161
11162 ctxt = xmlNewParserCtxt();
11163 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011164 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011165 return(NULL);
11166 }
11167
11168 /*
11169 * Set-up the SAX context
11170 */
11171 if (sax != NULL) {
11172 if (ctxt->sax != NULL)
11173 xmlFree(ctxt->sax);
11174 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011175 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011176 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011177 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011178
11179 /*
11180 * generate a parser input from the I/O handler
11181 */
11182
Daniel Veillard43caefb2003-12-07 19:32:22 +000011183 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011184 if (pinput == NULL) {
11185 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011186 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011187 xmlFreeParserCtxt(ctxt);
11188 return(NULL);
11189 }
11190
11191 /*
11192 * plug some encoding conversion routines here.
11193 */
11194 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011195 if (enc != XML_CHAR_ENCODING_NONE) {
11196 xmlSwitchEncoding(ctxt, enc);
11197 }
Owen Taylor3473f882001-02-23 17:55:21 +000011198
11199 pinput->filename = NULL;
11200 pinput->line = 1;
11201 pinput->col = 1;
11202 pinput->base = ctxt->input->cur;
11203 pinput->cur = ctxt->input->cur;
11204 pinput->free = NULL;
11205
11206 /*
11207 * let's parse that entity knowing it's an external subset.
11208 */
11209 ctxt->inSubset = 2;
11210 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11211 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11212 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011213
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011214 if ((enc == XML_CHAR_ENCODING_NONE) &&
11215 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011216 /*
11217 * Get the 4 first bytes and decode the charset
11218 * if enc != XML_CHAR_ENCODING_NONE
11219 * plug some encoding conversion routines.
11220 */
11221 start[0] = RAW;
11222 start[1] = NXT(1);
11223 start[2] = NXT(2);
11224 start[3] = NXT(3);
11225 enc = xmlDetectCharEncoding(start, 4);
11226 if (enc != XML_CHAR_ENCODING_NONE) {
11227 xmlSwitchEncoding(ctxt, enc);
11228 }
11229 }
11230
Owen Taylor3473f882001-02-23 17:55:21 +000011231 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11232
11233 if (ctxt->myDoc != NULL) {
11234 if (ctxt->wellFormed) {
11235 ret = ctxt->myDoc->extSubset;
11236 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011237 if (ret != NULL) {
11238 xmlNodePtr tmp;
11239
11240 ret->doc = NULL;
11241 tmp = ret->children;
11242 while (tmp != NULL) {
11243 tmp->doc = NULL;
11244 tmp = tmp->next;
11245 }
11246 }
Owen Taylor3473f882001-02-23 17:55:21 +000011247 } else {
11248 ret = NULL;
11249 }
11250 xmlFreeDoc(ctxt->myDoc);
11251 ctxt->myDoc = NULL;
11252 }
11253 if (sax != NULL) ctxt->sax = NULL;
11254 xmlFreeParserCtxt(ctxt);
11255
11256 return(ret);
11257}
11258
11259/**
11260 * xmlSAXParseDTD:
11261 * @sax: the SAX handler block
11262 * @ExternalID: a NAME* containing the External ID of the DTD
11263 * @SystemID: a NAME* containing the URL to the DTD
11264 *
11265 * Load and parse an external subset.
11266 *
11267 * Returns the resulting xmlDtdPtr or NULL in case of error.
11268 */
11269
11270xmlDtdPtr
11271xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11272 const xmlChar *SystemID) {
11273 xmlDtdPtr ret = NULL;
11274 xmlParserCtxtPtr ctxt;
11275 xmlParserInputPtr input = NULL;
11276 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011277 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011278
11279 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11280
11281 ctxt = xmlNewParserCtxt();
11282 if (ctxt == NULL) {
11283 return(NULL);
11284 }
11285
11286 /*
11287 * Set-up the SAX context
11288 */
11289 if (sax != NULL) {
11290 if (ctxt->sax != NULL)
11291 xmlFree(ctxt->sax);
11292 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011293 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011294 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011295
11296 /*
11297 * Canonicalise the system ID
11298 */
11299 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011300 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011301 xmlFreeParserCtxt(ctxt);
11302 return(NULL);
11303 }
Owen Taylor3473f882001-02-23 17:55:21 +000011304
11305 /*
11306 * Ask the Entity resolver to load the damn thing
11307 */
11308
11309 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011310 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11311 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011312 if (input == NULL) {
11313 if (sax != NULL) ctxt->sax = NULL;
11314 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011315 if (systemIdCanonic != NULL)
11316 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011317 return(NULL);
11318 }
11319
11320 /*
11321 * plug some encoding conversion routines here.
11322 */
11323 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011324 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11325 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11326 xmlSwitchEncoding(ctxt, enc);
11327 }
Owen Taylor3473f882001-02-23 17:55:21 +000011328
11329 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011330 input->filename = (char *) systemIdCanonic;
11331 else
11332 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011333 input->line = 1;
11334 input->col = 1;
11335 input->base = ctxt->input->cur;
11336 input->cur = ctxt->input->cur;
11337 input->free = NULL;
11338
11339 /*
11340 * let's parse that entity knowing it's an external subset.
11341 */
11342 ctxt->inSubset = 2;
11343 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11344 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11345 ExternalID, SystemID);
11346 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11347
11348 if (ctxt->myDoc != NULL) {
11349 if (ctxt->wellFormed) {
11350 ret = ctxt->myDoc->extSubset;
11351 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011352 if (ret != NULL) {
11353 xmlNodePtr tmp;
11354
11355 ret->doc = NULL;
11356 tmp = ret->children;
11357 while (tmp != NULL) {
11358 tmp->doc = NULL;
11359 tmp = tmp->next;
11360 }
11361 }
Owen Taylor3473f882001-02-23 17:55:21 +000011362 } else {
11363 ret = NULL;
11364 }
11365 xmlFreeDoc(ctxt->myDoc);
11366 ctxt->myDoc = NULL;
11367 }
11368 if (sax != NULL) ctxt->sax = NULL;
11369 xmlFreeParserCtxt(ctxt);
11370
11371 return(ret);
11372}
11373
Daniel Veillard4432df22003-09-28 18:58:27 +000011374
Owen Taylor3473f882001-02-23 17:55:21 +000011375/**
11376 * xmlParseDTD:
11377 * @ExternalID: a NAME* containing the External ID of the DTD
11378 * @SystemID: a NAME* containing the URL to the DTD
11379 *
11380 * Load and parse an external subset.
11381 *
11382 * Returns the resulting xmlDtdPtr or NULL in case of error.
11383 */
11384
11385xmlDtdPtr
11386xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11387 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11388}
Daniel Veillard4432df22003-09-28 18:58:27 +000011389#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011390
11391/************************************************************************
11392 * *
11393 * Front ends when parsing an Entity *
11394 * *
11395 ************************************************************************/
11396
11397/**
Owen Taylor3473f882001-02-23 17:55:21 +000011398 * xmlParseCtxtExternalEntity:
11399 * @ctx: the existing parsing context
11400 * @URL: the URL for the entity to load
11401 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011402 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011403 *
11404 * Parse an external general entity within an existing parsing context
11405 * An external general parsed entity is well-formed if it matches the
11406 * production labeled extParsedEnt.
11407 *
11408 * [78] extParsedEnt ::= TextDecl? content
11409 *
11410 * Returns 0 if the entity is well formed, -1 in case of args problem and
11411 * the parser error code otherwise
11412 */
11413
11414int
11415xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011416 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011417 xmlParserCtxtPtr ctxt;
11418 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011419 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011420 xmlSAXHandlerPtr oldsax = NULL;
11421 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011422 xmlChar start[4];
11423 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011424 xmlParserInputPtr inputStream;
11425 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011426
Daniel Veillardce682bc2004-11-05 17:22:25 +000011427 if (ctx == NULL) return(-1);
11428
Owen Taylor3473f882001-02-23 17:55:21 +000011429 if (ctx->depth > 40) {
11430 return(XML_ERR_ENTITY_LOOP);
11431 }
11432
Daniel Veillardcda96922001-08-21 10:56:31 +000011433 if (lst != NULL)
11434 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011435 if ((URL == NULL) && (ID == NULL))
11436 return(-1);
11437 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11438 return(-1);
11439
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011440 ctxt = xmlNewParserCtxt();
11441 if (ctxt == NULL) {
11442 return(-1);
11443 }
11444
Owen Taylor3473f882001-02-23 17:55:21 +000011445 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011446 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011447
11448 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11449 if (inputStream == NULL) {
11450 xmlFreeParserCtxt(ctxt);
11451 return(-1);
11452 }
11453
11454 inputPush(ctxt, inputStream);
11455
11456 if ((ctxt->directory == NULL) && (directory == NULL))
11457 directory = xmlParserGetDirectory((char *)URL);
11458 if ((ctxt->directory == NULL) && (directory != NULL))
11459 ctxt->directory = directory;
11460
Owen Taylor3473f882001-02-23 17:55:21 +000011461 oldsax = ctxt->sax;
11462 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011463 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011464 newDoc = xmlNewDoc(BAD_CAST "1.0");
11465 if (newDoc == NULL) {
11466 xmlFreeParserCtxt(ctxt);
11467 return(-1);
11468 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011469 if (ctx->myDoc->dict) {
11470 newDoc->dict = ctx->myDoc->dict;
11471 xmlDictReference(newDoc->dict);
11472 }
Owen Taylor3473f882001-02-23 17:55:21 +000011473 if (ctx->myDoc != NULL) {
11474 newDoc->intSubset = ctx->myDoc->intSubset;
11475 newDoc->extSubset = ctx->myDoc->extSubset;
11476 }
11477 if (ctx->myDoc->URL != NULL) {
11478 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11479 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011480 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11481 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011482 ctxt->sax = oldsax;
11483 xmlFreeParserCtxt(ctxt);
11484 newDoc->intSubset = NULL;
11485 newDoc->extSubset = NULL;
11486 xmlFreeDoc(newDoc);
11487 return(-1);
11488 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011489 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011490 nodePush(ctxt, newDoc->children);
11491 if (ctx->myDoc == NULL) {
11492 ctxt->myDoc = newDoc;
11493 } else {
11494 ctxt->myDoc = ctx->myDoc;
11495 newDoc->children->doc = ctx->myDoc;
11496 }
11497
Daniel Veillard87a764e2001-06-20 17:41:10 +000011498 /*
11499 * Get the 4 first bytes and decode the charset
11500 * if enc != XML_CHAR_ENCODING_NONE
11501 * plug some encoding conversion routines.
11502 */
11503 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011504 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11505 start[0] = RAW;
11506 start[1] = NXT(1);
11507 start[2] = NXT(2);
11508 start[3] = NXT(3);
11509 enc = xmlDetectCharEncoding(start, 4);
11510 if (enc != XML_CHAR_ENCODING_NONE) {
11511 xmlSwitchEncoding(ctxt, enc);
11512 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011513 }
11514
Owen Taylor3473f882001-02-23 17:55:21 +000011515 /*
11516 * Parse a possible text declaration first
11517 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011518 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011519 xmlParseTextDecl(ctxt);
11520 }
11521
11522 /*
11523 * Doing validity checking on chunk doesn't make sense
11524 */
11525 ctxt->instate = XML_PARSER_CONTENT;
11526 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011527 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011528 ctxt->loadsubset = ctx->loadsubset;
11529 ctxt->depth = ctx->depth + 1;
11530 ctxt->replaceEntities = ctx->replaceEntities;
11531 if (ctxt->validate) {
11532 ctxt->vctxt.error = ctx->vctxt.error;
11533 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011534 } else {
11535 ctxt->vctxt.error = NULL;
11536 ctxt->vctxt.warning = NULL;
11537 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011538 ctxt->vctxt.nodeTab = NULL;
11539 ctxt->vctxt.nodeNr = 0;
11540 ctxt->vctxt.nodeMax = 0;
11541 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011542 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11543 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011544 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11545 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11546 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011547 ctxt->dictNames = ctx->dictNames;
11548 ctxt->attsDefault = ctx->attsDefault;
11549 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011550 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011551
11552 xmlParseContent(ctxt);
11553
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011554 ctx->validate = ctxt->validate;
11555 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011556 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011557 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011558 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011559 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011560 }
11561 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011562 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011563 }
11564
11565 if (!ctxt->wellFormed) {
11566 if (ctxt->errNo == 0)
11567 ret = 1;
11568 else
11569 ret = ctxt->errNo;
11570 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011571 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011572 xmlNodePtr cur;
11573
11574 /*
11575 * Return the newly created nodeset after unlinking it from
11576 * they pseudo parent.
11577 */
11578 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011579 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011580 while (cur != NULL) {
11581 cur->parent = NULL;
11582 cur = cur->next;
11583 }
11584 newDoc->children->children = NULL;
11585 }
11586 ret = 0;
11587 }
11588 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011589 ctxt->dict = NULL;
11590 ctxt->attsDefault = NULL;
11591 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011592 xmlFreeParserCtxt(ctxt);
11593 newDoc->intSubset = NULL;
11594 newDoc->extSubset = NULL;
11595 xmlFreeDoc(newDoc);
11596
11597 return(ret);
11598}
11599
11600/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011601 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011602 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011603 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011604 * @sax: the SAX handler bloc (possibly NULL)
11605 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11606 * @depth: Used for loop detection, use 0
11607 * @URL: the URL for the entity to load
11608 * @ID: the System ID for the entity to load
11609 * @list: the return value for the set of parsed nodes
11610 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011611 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011612 *
11613 * Returns 0 if the entity is well formed, -1 in case of args problem and
11614 * the parser error code otherwise
11615 */
11616
Daniel Veillard7d515752003-09-26 19:12:37 +000011617static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011618xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11619 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011620 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011621 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011622 xmlParserCtxtPtr ctxt;
11623 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011624 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011625 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011626 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011627 xmlChar start[4];
11628 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011629
11630 if (depth > 40) {
11631 return(XML_ERR_ENTITY_LOOP);
11632 }
11633
11634
11635
11636 if (list != NULL)
11637 *list = NULL;
11638 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011639 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011640 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011641 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011642
11643
11644 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011645 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011646 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011647 if (oldctxt != NULL) {
11648 ctxt->_private = oldctxt->_private;
11649 ctxt->loadsubset = oldctxt->loadsubset;
11650 ctxt->validate = oldctxt->validate;
11651 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011652 ctxt->record_info = oldctxt->record_info;
11653 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11654 ctxt->node_seq.length = oldctxt->node_seq.length;
11655 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011656 } else {
11657 /*
11658 * Doing validity checking on chunk without context
11659 * doesn't make sense
11660 */
11661 ctxt->_private = NULL;
11662 ctxt->validate = 0;
11663 ctxt->external = 2;
11664 ctxt->loadsubset = 0;
11665 }
Owen Taylor3473f882001-02-23 17:55:21 +000011666 if (sax != NULL) {
11667 oldsax = ctxt->sax;
11668 ctxt->sax = sax;
11669 if (user_data != NULL)
11670 ctxt->userData = user_data;
11671 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011672 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011673 newDoc = xmlNewDoc(BAD_CAST "1.0");
11674 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011675 ctxt->node_seq.maximum = 0;
11676 ctxt->node_seq.length = 0;
11677 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011678 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011679 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011680 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011681 newDoc->intSubset = doc->intSubset;
11682 newDoc->extSubset = doc->extSubset;
11683 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011684 xmlDictReference(newDoc->dict);
11685
Owen Taylor3473f882001-02-23 17:55:21 +000011686 if (doc->URL != NULL) {
11687 newDoc->URL = xmlStrdup(doc->URL);
11688 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011689 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11690 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011691 if (sax != NULL)
11692 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011693 ctxt->node_seq.maximum = 0;
11694 ctxt->node_seq.length = 0;
11695 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011696 xmlFreeParserCtxt(ctxt);
11697 newDoc->intSubset = NULL;
11698 newDoc->extSubset = NULL;
11699 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011700 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011701 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011702 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011703 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011704 ctxt->myDoc = doc;
11705 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011706
Daniel Veillard87a764e2001-06-20 17:41:10 +000011707 /*
11708 * Get the 4 first bytes and decode the charset
11709 * if enc != XML_CHAR_ENCODING_NONE
11710 * plug some encoding conversion routines.
11711 */
11712 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011713 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11714 start[0] = RAW;
11715 start[1] = NXT(1);
11716 start[2] = NXT(2);
11717 start[3] = NXT(3);
11718 enc = xmlDetectCharEncoding(start, 4);
11719 if (enc != XML_CHAR_ENCODING_NONE) {
11720 xmlSwitchEncoding(ctxt, enc);
11721 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011722 }
11723
Owen Taylor3473f882001-02-23 17:55:21 +000011724 /*
11725 * Parse a possible text declaration first
11726 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011727 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011728 xmlParseTextDecl(ctxt);
11729 }
11730
Owen Taylor3473f882001-02-23 17:55:21 +000011731 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 ctxt->depth = depth;
11733
11734 xmlParseContent(ctxt);
11735
Daniel Veillard561b7f82002-03-20 21:55:57 +000011736 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011737 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011738 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011739 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011740 }
11741 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011742 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011743 }
11744
11745 if (!ctxt->wellFormed) {
11746 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011747 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011748 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011749 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 } else {
11751 if (list != NULL) {
11752 xmlNodePtr cur;
11753
11754 /*
11755 * Return the newly created nodeset after unlinking it from
11756 * they pseudo parent.
11757 */
11758 cur = newDoc->children->children;
11759 *list = cur;
11760 while (cur != NULL) {
11761 cur->parent = NULL;
11762 cur = cur->next;
11763 }
11764 newDoc->children->children = NULL;
11765 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011766 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 }
11768 if (sax != NULL)
11769 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011770 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11771 oldctxt->node_seq.length = ctxt->node_seq.length;
11772 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011773 ctxt->node_seq.maximum = 0;
11774 ctxt->node_seq.length = 0;
11775 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 xmlFreeParserCtxt(ctxt);
11777 newDoc->intSubset = NULL;
11778 newDoc->extSubset = NULL;
11779 xmlFreeDoc(newDoc);
11780
11781 return(ret);
11782}
11783
Daniel Veillard81273902003-09-30 00:43:48 +000011784#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011785/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011786 * xmlParseExternalEntity:
11787 * @doc: the document the chunk pertains to
11788 * @sax: the SAX handler bloc (possibly NULL)
11789 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11790 * @depth: Used for loop detection, use 0
11791 * @URL: the URL for the entity to load
11792 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011793 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011794 *
11795 * Parse an external general entity
11796 * An external general parsed entity is well-formed if it matches the
11797 * production labeled extParsedEnt.
11798 *
11799 * [78] extParsedEnt ::= TextDecl? content
11800 *
11801 * Returns 0 if the entity is well formed, -1 in case of args problem and
11802 * the parser error code otherwise
11803 */
11804
11805int
11806xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011807 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011808 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011809 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011810}
11811
11812/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011813 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011814 * @doc: the document the chunk pertains to
11815 * @sax: the SAX handler bloc (possibly NULL)
11816 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11817 * @depth: Used for loop detection, use 0
11818 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011819 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011820 *
11821 * Parse a well-balanced chunk of an XML document
11822 * called by the parser
11823 * The allowed sequence for the Well Balanced Chunk is the one defined by
11824 * the content production in the XML grammar:
11825 *
11826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11827 *
11828 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11829 * the parser error code otherwise
11830 */
11831
11832int
11833xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011834 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011835 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11836 depth, string, lst, 0 );
11837}
Daniel Veillard81273902003-09-30 00:43:48 +000011838#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011839
11840/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011841 * xmlParseBalancedChunkMemoryInternal:
11842 * @oldctxt: the existing parsing context
11843 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11844 * @user_data: the user data field for the parser context
11845 * @lst: the return value for the set of parsed nodes
11846 *
11847 *
11848 * Parse a well-balanced chunk of an XML document
11849 * called by the parser
11850 * The allowed sequence for the Well Balanced Chunk is the one defined by
11851 * the content production in the XML grammar:
11852 *
11853 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11854 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011855 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11856 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011857 *
11858 * In case recover is set to 1, the nodelist will not be empty even if
11859 * the parsed chunk is not well balanced.
11860 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011861static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011862xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11863 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11864 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011865 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011866 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011867 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011868 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011869 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011870 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011871 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011872
11873 if (oldctxt->depth > 40) {
11874 return(XML_ERR_ENTITY_LOOP);
11875 }
11876
11877
11878 if (lst != NULL)
11879 *lst = NULL;
11880 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011881 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011882
11883 size = xmlStrlen(string);
11884
11885 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011886 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011887 if (user_data != NULL)
11888 ctxt->userData = user_data;
11889 else
11890 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011891 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11892 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011893 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11894 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11895 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011896
11897 oldsax = ctxt->sax;
11898 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011899 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011900 ctxt->replaceEntities = oldctxt->replaceEntities;
11901 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011902
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011903 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011904 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011905 newDoc = xmlNewDoc(BAD_CAST "1.0");
11906 if (newDoc == NULL) {
11907 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011908 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011909 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011910 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011911 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011912 newDoc->dict = ctxt->dict;
11913 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011914 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011915 } else {
11916 ctxt->myDoc = oldctxt->myDoc;
11917 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011918 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011919 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011920 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11921 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011922 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011923 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011924 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011925 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011926 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011927 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011928 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011929 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011930 ctxt->myDoc->children = NULL;
11931 ctxt->myDoc->last = NULL;
11932 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011933 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011934 ctxt->instate = XML_PARSER_CONTENT;
11935 ctxt->depth = oldctxt->depth + 1;
11936
Daniel Veillard328f48c2002-11-15 15:24:34 +000011937 ctxt->validate = 0;
11938 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011939 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11940 /*
11941 * ID/IDREF registration will be done in xmlValidateElement below
11942 */
11943 ctxt->loadsubset |= XML_SKIP_IDS;
11944 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011945 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011946 ctxt->attsDefault = oldctxt->attsDefault;
11947 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011948
Daniel Veillard68e9e742002-11-16 15:35:11 +000011949 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011950 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011951 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011952 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011953 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011954 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011955 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011956 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011957 }
11958
11959 if (!ctxt->wellFormed) {
11960 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011961 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011962 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011963 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011964 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011965 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011966 }
11967
William M. Brack7b9154b2003-09-27 19:23:50 +000011968 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011969 xmlNodePtr cur;
11970
11971 /*
11972 * Return the newly created nodeset after unlinking it from
11973 * they pseudo parent.
11974 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011975 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011976 *lst = cur;
11977 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011978#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011979 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11980 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11981 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011982 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11983 oldctxt->myDoc, cur);
11984 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011985#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011986 cur->parent = NULL;
11987 cur = cur->next;
11988 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011989 ctxt->myDoc->children->children = NULL;
11990 }
11991 if (ctxt->myDoc != NULL) {
11992 xmlFreeNode(ctxt->myDoc->children);
11993 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011994 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011995 }
11996
11997 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011998 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011999 ctxt->attsDefault = NULL;
12000 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012001 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012002 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012003 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012004 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012005
12006 return(ret);
12007}
12008
Daniel Veillard29b17482004-08-16 00:39:03 +000012009/**
12010 * xmlParseInNodeContext:
12011 * @node: the context node
12012 * @data: the input string
12013 * @datalen: the input string length in bytes
12014 * @options: a combination of xmlParserOption
12015 * @lst: the return value for the set of parsed nodes
12016 *
12017 * Parse a well-balanced chunk of an XML document
12018 * within the context (DTD, namespaces, etc ...) of the given node.
12019 *
12020 * The allowed sequence for the data is a Well Balanced Chunk defined by
12021 * the content production in the XML grammar:
12022 *
12023 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12024 *
12025 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12026 * error code otherwise
12027 */
12028xmlParserErrors
12029xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12030 int options, xmlNodePtr *lst) {
12031#ifdef SAX2
12032 xmlParserCtxtPtr ctxt;
12033 xmlDocPtr doc = NULL;
12034 xmlNodePtr fake, cur;
12035 int nsnr = 0;
12036
12037 xmlParserErrors ret = XML_ERR_OK;
12038
12039 /*
12040 * check all input parameters, grab the document
12041 */
12042 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12043 return(XML_ERR_INTERNAL_ERROR);
12044 switch (node->type) {
12045 case XML_ELEMENT_NODE:
12046 case XML_ATTRIBUTE_NODE:
12047 case XML_TEXT_NODE:
12048 case XML_CDATA_SECTION_NODE:
12049 case XML_ENTITY_REF_NODE:
12050 case XML_PI_NODE:
12051 case XML_COMMENT_NODE:
12052 case XML_DOCUMENT_NODE:
12053 case XML_HTML_DOCUMENT_NODE:
12054 break;
12055 default:
12056 return(XML_ERR_INTERNAL_ERROR);
12057
12058 }
12059 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12060 (node->type != XML_DOCUMENT_NODE) &&
12061 (node->type != XML_HTML_DOCUMENT_NODE))
12062 node = node->parent;
12063 if (node == NULL)
12064 return(XML_ERR_INTERNAL_ERROR);
12065 if (node->type == XML_ELEMENT_NODE)
12066 doc = node->doc;
12067 else
12068 doc = (xmlDocPtr) node;
12069 if (doc == NULL)
12070 return(XML_ERR_INTERNAL_ERROR);
12071
12072 /*
12073 * allocate a context and set-up everything not related to the
12074 * node position in the tree
12075 */
12076 if (doc->type == XML_DOCUMENT_NODE)
12077 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12078#ifdef LIBXML_HTML_ENABLED
12079 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12080 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12081#endif
12082 else
12083 return(XML_ERR_INTERNAL_ERROR);
12084
12085 if (ctxt == NULL)
12086 return(XML_ERR_NO_MEMORY);
12087 fake = xmlNewComment(NULL);
12088 if (fake == NULL) {
12089 xmlFreeParserCtxt(ctxt);
12090 return(XML_ERR_NO_MEMORY);
12091 }
12092 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012093
12094 /*
12095 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12096 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12097 * we must wait until the last moment to free the original one.
12098 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012099 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012100 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012101 xmlDictFree(ctxt->dict);
12102 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012103 } else
12104 options |= XML_PARSE_NODICT;
12105
12106 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000012107 xmlDetectSAX2(ctxt);
12108 ctxt->myDoc = doc;
12109
12110 if (node->type == XML_ELEMENT_NODE) {
12111 nodePush(ctxt, node);
12112 /*
12113 * initialize the SAX2 namespaces stack
12114 */
12115 cur = node;
12116 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12117 xmlNsPtr ns = cur->nsDef;
12118 const xmlChar *iprefix, *ihref;
12119
12120 while (ns != NULL) {
12121 if (ctxt->dict) {
12122 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12123 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12124 } else {
12125 iprefix = ns->prefix;
12126 ihref = ns->href;
12127 }
12128
12129 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12130 nsPush(ctxt, iprefix, ihref);
12131 nsnr++;
12132 }
12133 ns = ns->next;
12134 }
12135 cur = cur->parent;
12136 }
12137 ctxt->instate = XML_PARSER_CONTENT;
12138 }
12139
12140 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12141 /*
12142 * ID/IDREF registration will be done in xmlValidateElement below
12143 */
12144 ctxt->loadsubset |= XML_SKIP_IDS;
12145 }
12146
Daniel Veillard499cc922006-01-18 17:22:35 +000012147#ifdef LIBXML_HTML_ENABLED
12148 if (doc->type == XML_HTML_DOCUMENT_NODE)
12149 __htmlParseContent(ctxt);
12150 else
12151#endif
12152 xmlParseContent(ctxt);
12153
Daniel Veillard29b17482004-08-16 00:39:03 +000012154 nsPop(ctxt, nsnr);
12155 if ((RAW == '<') && (NXT(1) == '/')) {
12156 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12157 } else if (RAW != 0) {
12158 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12159 }
12160 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12161 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12162 ctxt->wellFormed = 0;
12163 }
12164
12165 if (!ctxt->wellFormed) {
12166 if (ctxt->errNo == 0)
12167 ret = XML_ERR_INTERNAL_ERROR;
12168 else
12169 ret = (xmlParserErrors)ctxt->errNo;
12170 } else {
12171 ret = XML_ERR_OK;
12172 }
12173
12174 /*
12175 * Return the newly created nodeset after unlinking it from
12176 * the pseudo sibling.
12177 */
12178
12179 cur = fake->next;
12180 fake->next = NULL;
12181 node->last = fake;
12182
12183 if (cur != NULL) {
12184 cur->prev = NULL;
12185 }
12186
12187 *lst = cur;
12188
12189 while (cur != NULL) {
12190 cur->parent = NULL;
12191 cur = cur->next;
12192 }
12193
12194 xmlUnlinkNode(fake);
12195 xmlFreeNode(fake);
12196
12197
12198 if (ret != XML_ERR_OK) {
12199 xmlFreeNodeList(*lst);
12200 *lst = NULL;
12201 }
William M. Brackc3f81342004-10-03 01:22:44 +000012202
William M. Brackb7b54de2004-10-06 16:38:01 +000012203 if (doc->dict != NULL)
12204 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012205 xmlFreeParserCtxt(ctxt);
12206
12207 return(ret);
12208#else /* !SAX2 */
12209 return(XML_ERR_INTERNAL_ERROR);
12210#endif
12211}
12212
Daniel Veillard81273902003-09-30 00:43:48 +000012213#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012214/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012215 * xmlParseBalancedChunkMemoryRecover:
12216 * @doc: the document the chunk pertains to
12217 * @sax: the SAX handler bloc (possibly NULL)
12218 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12219 * @depth: Used for loop detection, use 0
12220 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12221 * @lst: the return value for the set of parsed nodes
12222 * @recover: return nodes even if the data is broken (use 0)
12223 *
12224 *
12225 * Parse a well-balanced chunk of an XML document
12226 * called by the parser
12227 * The allowed sequence for the Well Balanced Chunk is the one defined by
12228 * the content production in the XML grammar:
12229 *
12230 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12231 *
12232 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12233 * the parser error code otherwise
12234 *
12235 * In case recover is set to 1, the nodelist will not be empty even if
12236 * the parsed chunk is not well balanced.
12237 */
12238int
12239xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12240 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12241 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012242 xmlParserCtxtPtr ctxt;
12243 xmlDocPtr newDoc;
12244 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012245 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012246 int size;
12247 int ret = 0;
12248
12249 if (depth > 40) {
12250 return(XML_ERR_ENTITY_LOOP);
12251 }
12252
12253
Daniel Veillardcda96922001-08-21 10:56:31 +000012254 if (lst != NULL)
12255 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012256 if (string == NULL)
12257 return(-1);
12258
12259 size = xmlStrlen(string);
12260
12261 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12262 if (ctxt == NULL) return(-1);
12263 ctxt->userData = ctxt;
12264 if (sax != NULL) {
12265 oldsax = ctxt->sax;
12266 ctxt->sax = sax;
12267 if (user_data != NULL)
12268 ctxt->userData = user_data;
12269 }
12270 newDoc = xmlNewDoc(BAD_CAST "1.0");
12271 if (newDoc == NULL) {
12272 xmlFreeParserCtxt(ctxt);
12273 return(-1);
12274 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012275 if ((doc != NULL) && (doc->dict != NULL)) {
12276 xmlDictFree(ctxt->dict);
12277 ctxt->dict = doc->dict;
12278 xmlDictReference(ctxt->dict);
12279 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12280 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12281 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12282 ctxt->dictNames = 1;
12283 } else {
12284 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12285 }
Owen Taylor3473f882001-02-23 17:55:21 +000012286 if (doc != NULL) {
12287 newDoc->intSubset = doc->intSubset;
12288 newDoc->extSubset = doc->extSubset;
12289 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012290 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12291 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012292 if (sax != NULL)
12293 ctxt->sax = oldsax;
12294 xmlFreeParserCtxt(ctxt);
12295 newDoc->intSubset = NULL;
12296 newDoc->extSubset = NULL;
12297 xmlFreeDoc(newDoc);
12298 return(-1);
12299 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012300 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12301 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012302 if (doc == NULL) {
12303 ctxt->myDoc = newDoc;
12304 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012305 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012306 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012307 /* Ensure that doc has XML spec namespace */
12308 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12309 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012310 }
12311 ctxt->instate = XML_PARSER_CONTENT;
12312 ctxt->depth = depth;
12313
12314 /*
12315 * Doing validity checking on chunk doesn't make sense
12316 */
12317 ctxt->validate = 0;
12318 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012319 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012320
Daniel Veillardb39bc392002-10-26 19:29:51 +000012321 if ( doc != NULL ){
12322 content = doc->children;
12323 doc->children = NULL;
12324 xmlParseContent(ctxt);
12325 doc->children = content;
12326 }
12327 else {
12328 xmlParseContent(ctxt);
12329 }
Owen Taylor3473f882001-02-23 17:55:21 +000012330 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012331 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012332 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012333 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012334 }
12335 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012336 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012337 }
12338
12339 if (!ctxt->wellFormed) {
12340 if (ctxt->errNo == 0)
12341 ret = 1;
12342 else
12343 ret = ctxt->errNo;
12344 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012345 ret = 0;
12346 }
12347
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012348 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12349 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012350
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012351 /*
12352 * Return the newly created nodeset after unlinking it from
12353 * they pseudo parent.
12354 */
12355 cur = newDoc->children->children;
12356 *lst = cur;
12357 while (cur != NULL) {
12358 xmlSetTreeDoc(cur, doc);
12359 cur->parent = NULL;
12360 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012361 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012362 newDoc->children->children = NULL;
12363 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012364
Owen Taylor3473f882001-02-23 17:55:21 +000012365 if (sax != NULL)
12366 ctxt->sax = oldsax;
12367 xmlFreeParserCtxt(ctxt);
12368 newDoc->intSubset = NULL;
12369 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012370 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012371 xmlFreeDoc(newDoc);
12372
12373 return(ret);
12374}
12375
12376/**
12377 * xmlSAXParseEntity:
12378 * @sax: the SAX handler block
12379 * @filename: the filename
12380 *
12381 * parse an XML external entity out of context and build a tree.
12382 * It use the given SAX function block to handle the parsing callback.
12383 * If sax is NULL, fallback to the default DOM tree building routines.
12384 *
12385 * [78] extParsedEnt ::= TextDecl? content
12386 *
12387 * This correspond to a "Well Balanced" chunk
12388 *
12389 * Returns the resulting document tree
12390 */
12391
12392xmlDocPtr
12393xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12394 xmlDocPtr ret;
12395 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012396
12397 ctxt = xmlCreateFileParserCtxt(filename);
12398 if (ctxt == NULL) {
12399 return(NULL);
12400 }
12401 if (sax != NULL) {
12402 if (ctxt->sax != NULL)
12403 xmlFree(ctxt->sax);
12404 ctxt->sax = sax;
12405 ctxt->userData = NULL;
12406 }
12407
Owen Taylor3473f882001-02-23 17:55:21 +000012408 xmlParseExtParsedEnt(ctxt);
12409
12410 if (ctxt->wellFormed)
12411 ret = ctxt->myDoc;
12412 else {
12413 ret = NULL;
12414 xmlFreeDoc(ctxt->myDoc);
12415 ctxt->myDoc = NULL;
12416 }
12417 if (sax != NULL)
12418 ctxt->sax = NULL;
12419 xmlFreeParserCtxt(ctxt);
12420
12421 return(ret);
12422}
12423
12424/**
12425 * xmlParseEntity:
12426 * @filename: the filename
12427 *
12428 * parse an XML external entity out of context and build a tree.
12429 *
12430 * [78] extParsedEnt ::= TextDecl? content
12431 *
12432 * This correspond to a "Well Balanced" chunk
12433 *
12434 * Returns the resulting document tree
12435 */
12436
12437xmlDocPtr
12438xmlParseEntity(const char *filename) {
12439 return(xmlSAXParseEntity(NULL, filename));
12440}
Daniel Veillard81273902003-09-30 00:43:48 +000012441#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012442
12443/**
12444 * xmlCreateEntityParserCtxt:
12445 * @URL: the entity URL
12446 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012447 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012448 *
12449 * Create a parser context for an external entity
12450 * Automatic support for ZLIB/Compress compressed document is provided
12451 * by default if found at compile-time.
12452 *
12453 * Returns the new parser context or NULL
12454 */
12455xmlParserCtxtPtr
12456xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12457 const xmlChar *base) {
12458 xmlParserCtxtPtr ctxt;
12459 xmlParserInputPtr inputStream;
12460 char *directory = NULL;
12461 xmlChar *uri;
12462
12463 ctxt = xmlNewParserCtxt();
12464 if (ctxt == NULL) {
12465 return(NULL);
12466 }
12467
12468 uri = xmlBuildURI(URL, base);
12469
12470 if (uri == NULL) {
12471 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12472 if (inputStream == NULL) {
12473 xmlFreeParserCtxt(ctxt);
12474 return(NULL);
12475 }
12476
12477 inputPush(ctxt, inputStream);
12478
12479 if ((ctxt->directory == NULL) && (directory == NULL))
12480 directory = xmlParserGetDirectory((char *)URL);
12481 if ((ctxt->directory == NULL) && (directory != NULL))
12482 ctxt->directory = directory;
12483 } else {
12484 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12485 if (inputStream == NULL) {
12486 xmlFree(uri);
12487 xmlFreeParserCtxt(ctxt);
12488 return(NULL);
12489 }
12490
12491 inputPush(ctxt, inputStream);
12492
12493 if ((ctxt->directory == NULL) && (directory == NULL))
12494 directory = xmlParserGetDirectory((char *)uri);
12495 if ((ctxt->directory == NULL) && (directory != NULL))
12496 ctxt->directory = directory;
12497 xmlFree(uri);
12498 }
Owen Taylor3473f882001-02-23 17:55:21 +000012499 return(ctxt);
12500}
12501
12502/************************************************************************
12503 * *
12504 * Front ends when parsing from a file *
12505 * *
12506 ************************************************************************/
12507
12508/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012509 * xmlCreateURLParserCtxt:
12510 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012511 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012512 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012513 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012514 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012515 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012516 *
12517 * Returns the new parser context or NULL
12518 */
12519xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012520xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012521{
12522 xmlParserCtxtPtr ctxt;
12523 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012524 char *directory = NULL;
12525
Owen Taylor3473f882001-02-23 17:55:21 +000012526 ctxt = xmlNewParserCtxt();
12527 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012528 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012529 return(NULL);
12530 }
12531
Daniel Veillarddf292f72005-01-16 19:00:15 +000012532 if (options)
12533 xmlCtxtUseOptions(ctxt, options);
12534 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012535
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012536 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012537 if (inputStream == NULL) {
12538 xmlFreeParserCtxt(ctxt);
12539 return(NULL);
12540 }
12541
Owen Taylor3473f882001-02-23 17:55:21 +000012542 inputPush(ctxt, inputStream);
12543 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012544 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012545 if ((ctxt->directory == NULL) && (directory != NULL))
12546 ctxt->directory = directory;
12547
12548 return(ctxt);
12549}
12550
Daniel Veillard61b93382003-11-03 14:28:31 +000012551/**
12552 * xmlCreateFileParserCtxt:
12553 * @filename: the filename
12554 *
12555 * Create a parser context for a file content.
12556 * Automatic support for ZLIB/Compress compressed document is provided
12557 * by default if found at compile-time.
12558 *
12559 * Returns the new parser context or NULL
12560 */
12561xmlParserCtxtPtr
12562xmlCreateFileParserCtxt(const char *filename)
12563{
12564 return(xmlCreateURLParserCtxt(filename, 0));
12565}
12566
Daniel Veillard81273902003-09-30 00:43:48 +000012567#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012568/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012569 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012570 * @sax: the SAX handler block
12571 * @filename: the filename
12572 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12573 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012574 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012575 *
12576 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12577 * compressed document is provided by default if found at compile-time.
12578 * It use the given SAX function block to handle the parsing callback.
12579 * If sax is NULL, fallback to the default DOM tree building routines.
12580 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012581 * User data (void *) is stored within the parser context in the
12582 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012583 *
Owen Taylor3473f882001-02-23 17:55:21 +000012584 * Returns the resulting document tree
12585 */
12586
12587xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012588xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12589 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012590 xmlDocPtr ret;
12591 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012592
Daniel Veillard635ef722001-10-29 11:48:19 +000012593 xmlInitParser();
12594
Owen Taylor3473f882001-02-23 17:55:21 +000012595 ctxt = xmlCreateFileParserCtxt(filename);
12596 if (ctxt == NULL) {
12597 return(NULL);
12598 }
12599 if (sax != NULL) {
12600 if (ctxt->sax != NULL)
12601 xmlFree(ctxt->sax);
12602 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012603 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012604 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012605 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012606 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012607 }
Owen Taylor3473f882001-02-23 17:55:21 +000012608
Daniel Veillard37d2d162008-03-14 10:54:00 +000012609 if (ctxt->directory == NULL)
12610 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012611
Daniel Veillarddad3f682002-11-17 16:47:27 +000012612 ctxt->recovery = recovery;
12613
Owen Taylor3473f882001-02-23 17:55:21 +000012614 xmlParseDocument(ctxt);
12615
William M. Brackc07329e2003-09-08 01:57:30 +000012616 if ((ctxt->wellFormed) || recovery) {
12617 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012618 if (ret != NULL) {
12619 if (ctxt->input->buf->compressed > 0)
12620 ret->compression = 9;
12621 else
12622 ret->compression = ctxt->input->buf->compressed;
12623 }
William M. Brackc07329e2003-09-08 01:57:30 +000012624 }
Owen Taylor3473f882001-02-23 17:55:21 +000012625 else {
12626 ret = NULL;
12627 xmlFreeDoc(ctxt->myDoc);
12628 ctxt->myDoc = NULL;
12629 }
12630 if (sax != NULL)
12631 ctxt->sax = NULL;
12632 xmlFreeParserCtxt(ctxt);
12633
12634 return(ret);
12635}
12636
12637/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012638 * xmlSAXParseFile:
12639 * @sax: the SAX handler block
12640 * @filename: the filename
12641 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12642 * documents
12643 *
12644 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12645 * compressed document is provided by default if found at compile-time.
12646 * It use the given SAX function block to handle the parsing callback.
12647 * If sax is NULL, fallback to the default DOM tree building routines.
12648 *
12649 * Returns the resulting document tree
12650 */
12651
12652xmlDocPtr
12653xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12654 int recovery) {
12655 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12656}
12657
12658/**
Owen Taylor3473f882001-02-23 17:55:21 +000012659 * xmlRecoverDoc:
12660 * @cur: a pointer to an array of xmlChar
12661 *
12662 * parse an XML in-memory document and build a tree.
12663 * In the case the document is not Well Formed, a tree is built anyway
12664 *
12665 * Returns the resulting document tree
12666 */
12667
12668xmlDocPtr
12669xmlRecoverDoc(xmlChar *cur) {
12670 return(xmlSAXParseDoc(NULL, cur, 1));
12671}
12672
12673/**
12674 * xmlParseFile:
12675 * @filename: the filename
12676 *
12677 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12678 * compressed document is provided by default if found at compile-time.
12679 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012680 * Returns the resulting document tree if the file was wellformed,
12681 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012682 */
12683
12684xmlDocPtr
12685xmlParseFile(const char *filename) {
12686 return(xmlSAXParseFile(NULL, filename, 0));
12687}
12688
12689/**
12690 * xmlRecoverFile:
12691 * @filename: the filename
12692 *
12693 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12694 * compressed document is provided by default if found at compile-time.
12695 * In the case the document is not Well Formed, a tree is built anyway
12696 *
12697 * Returns the resulting document tree
12698 */
12699
12700xmlDocPtr
12701xmlRecoverFile(const char *filename) {
12702 return(xmlSAXParseFile(NULL, filename, 1));
12703}
12704
12705
12706/**
12707 * xmlSetupParserForBuffer:
12708 * @ctxt: an XML parser context
12709 * @buffer: a xmlChar * buffer
12710 * @filename: a file name
12711 *
12712 * Setup the parser context to parse a new buffer; Clears any prior
12713 * contents from the parser context. The buffer parameter must not be
12714 * NULL, but the filename parameter can be
12715 */
12716void
12717xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12718 const char* filename)
12719{
12720 xmlParserInputPtr input;
12721
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012722 if ((ctxt == NULL) || (buffer == NULL))
12723 return;
12724
Owen Taylor3473f882001-02-23 17:55:21 +000012725 input = xmlNewInputStream(ctxt);
12726 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012727 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012728 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012729 return;
12730 }
12731
12732 xmlClearParserCtxt(ctxt);
12733 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012734 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012735 input->base = buffer;
12736 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012737 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012738 inputPush(ctxt, input);
12739}
12740
12741/**
12742 * xmlSAXUserParseFile:
12743 * @sax: a SAX handler
12744 * @user_data: The user data returned on SAX callbacks
12745 * @filename: a file name
12746 *
12747 * parse an XML file and call the given SAX handler routines.
12748 * Automatic support for ZLIB/Compress compressed document is provided
12749 *
12750 * Returns 0 in case of success or a error number otherwise
12751 */
12752int
12753xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12754 const char *filename) {
12755 int ret = 0;
12756 xmlParserCtxtPtr ctxt;
12757
12758 ctxt = xmlCreateFileParserCtxt(filename);
12759 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012760 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012761 xmlFree(ctxt->sax);
12762 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012763 xmlDetectSAX2(ctxt);
12764
Owen Taylor3473f882001-02-23 17:55:21 +000012765 if (user_data != NULL)
12766 ctxt->userData = user_data;
12767
12768 xmlParseDocument(ctxt);
12769
12770 if (ctxt->wellFormed)
12771 ret = 0;
12772 else {
12773 if (ctxt->errNo != 0)
12774 ret = ctxt->errNo;
12775 else
12776 ret = -1;
12777 }
12778 if (sax != NULL)
12779 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012780 if (ctxt->myDoc != NULL) {
12781 xmlFreeDoc(ctxt->myDoc);
12782 ctxt->myDoc = NULL;
12783 }
Owen Taylor3473f882001-02-23 17:55:21 +000012784 xmlFreeParserCtxt(ctxt);
12785
12786 return ret;
12787}
Daniel Veillard81273902003-09-30 00:43:48 +000012788#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012789
12790/************************************************************************
12791 * *
12792 * Front ends when parsing from memory *
12793 * *
12794 ************************************************************************/
12795
12796/**
12797 * xmlCreateMemoryParserCtxt:
12798 * @buffer: a pointer to a char array
12799 * @size: the size of the array
12800 *
12801 * Create a parser context for an XML in-memory document.
12802 *
12803 * Returns the new parser context or NULL
12804 */
12805xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012806xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012807 xmlParserCtxtPtr ctxt;
12808 xmlParserInputPtr input;
12809 xmlParserInputBufferPtr buf;
12810
12811 if (buffer == NULL)
12812 return(NULL);
12813 if (size <= 0)
12814 return(NULL);
12815
12816 ctxt = xmlNewParserCtxt();
12817 if (ctxt == NULL)
12818 return(NULL);
12819
Daniel Veillard53350552003-09-18 13:35:51 +000012820 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012821 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012822 if (buf == NULL) {
12823 xmlFreeParserCtxt(ctxt);
12824 return(NULL);
12825 }
Owen Taylor3473f882001-02-23 17:55:21 +000012826
12827 input = xmlNewInputStream(ctxt);
12828 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012829 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012830 xmlFreeParserCtxt(ctxt);
12831 return(NULL);
12832 }
12833
12834 input->filename = NULL;
12835 input->buf = buf;
12836 input->base = input->buf->buffer->content;
12837 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012838 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012839
12840 inputPush(ctxt, input);
12841 return(ctxt);
12842}
12843
Daniel Veillard81273902003-09-30 00:43:48 +000012844#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012845/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012846 * xmlSAXParseMemoryWithData:
12847 * @sax: the SAX handler block
12848 * @buffer: an pointer to a char array
12849 * @size: the size of the array
12850 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12851 * documents
12852 * @data: the userdata
12853 *
12854 * parse an XML in-memory block and use the given SAX function block
12855 * to handle the parsing callback. If sax is NULL, fallback to the default
12856 * DOM tree building routines.
12857 *
12858 * User data (void *) is stored within the parser context in the
12859 * context's _private member, so it is available nearly everywhere in libxml
12860 *
12861 * Returns the resulting document tree
12862 */
12863
12864xmlDocPtr
12865xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12866 int size, int recovery, void *data) {
12867 xmlDocPtr ret;
12868 xmlParserCtxtPtr ctxt;
12869
12870 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12871 if (ctxt == NULL) return(NULL);
12872 if (sax != NULL) {
12873 if (ctxt->sax != NULL)
12874 xmlFree(ctxt->sax);
12875 ctxt->sax = sax;
12876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012877 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012878 if (data!=NULL) {
12879 ctxt->_private=data;
12880 }
12881
Daniel Veillardadba5f12003-04-04 16:09:01 +000012882 ctxt->recovery = recovery;
12883
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012884 xmlParseDocument(ctxt);
12885
12886 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12887 else {
12888 ret = NULL;
12889 xmlFreeDoc(ctxt->myDoc);
12890 ctxt->myDoc = NULL;
12891 }
12892 if (sax != NULL)
12893 ctxt->sax = NULL;
12894 xmlFreeParserCtxt(ctxt);
12895
12896 return(ret);
12897}
12898
12899/**
Owen Taylor3473f882001-02-23 17:55:21 +000012900 * xmlSAXParseMemory:
12901 * @sax: the SAX handler block
12902 * @buffer: an pointer to a char array
12903 * @size: the size of the array
12904 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12905 * documents
12906 *
12907 * parse an XML in-memory block and use the given SAX function block
12908 * to handle the parsing callback. If sax is NULL, fallback to the default
12909 * DOM tree building routines.
12910 *
12911 * Returns the resulting document tree
12912 */
12913xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012914xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12915 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012916 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012917}
12918
12919/**
12920 * xmlParseMemory:
12921 * @buffer: an pointer to a char array
12922 * @size: the size of the array
12923 *
12924 * parse an XML in-memory block and build a tree.
12925 *
12926 * Returns the resulting document tree
12927 */
12928
Daniel Veillard50822cb2001-07-26 20:05:51 +000012929xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012930 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12931}
12932
12933/**
12934 * xmlRecoverMemory:
12935 * @buffer: an pointer to a char array
12936 * @size: the size of the array
12937 *
12938 * parse an XML in-memory block and build a tree.
12939 * In the case the document is not Well Formed, a tree is built anyway
12940 *
12941 * Returns the resulting document tree
12942 */
12943
Daniel Veillard50822cb2001-07-26 20:05:51 +000012944xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012945 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12946}
12947
12948/**
12949 * xmlSAXUserParseMemory:
12950 * @sax: a SAX handler
12951 * @user_data: The user data returned on SAX callbacks
12952 * @buffer: an in-memory XML document input
12953 * @size: the length of the XML document in bytes
12954 *
12955 * A better SAX parsing routine.
12956 * parse an XML in-memory buffer and call the given SAX handler routines.
12957 *
12958 * Returns 0 in case of success or a error number otherwise
12959 */
12960int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012961 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012962 int ret = 0;
12963 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012964
12965 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12966 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012967 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12968 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000012969 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012970 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012971
Daniel Veillard30211a02001-04-26 09:33:18 +000012972 if (user_data != NULL)
12973 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012974
12975 xmlParseDocument(ctxt);
12976
12977 if (ctxt->wellFormed)
12978 ret = 0;
12979 else {
12980 if (ctxt->errNo != 0)
12981 ret = ctxt->errNo;
12982 else
12983 ret = -1;
12984 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012985 if (sax != NULL)
12986 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012987 if (ctxt->myDoc != NULL) {
12988 xmlFreeDoc(ctxt->myDoc);
12989 ctxt->myDoc = NULL;
12990 }
Owen Taylor3473f882001-02-23 17:55:21 +000012991 xmlFreeParserCtxt(ctxt);
12992
12993 return ret;
12994}
Daniel Veillard81273902003-09-30 00:43:48 +000012995#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012996
12997/**
12998 * xmlCreateDocParserCtxt:
12999 * @cur: a pointer to an array of xmlChar
13000 *
13001 * Creates a parser context for an XML in-memory document.
13002 *
13003 * Returns the new parser context or NULL
13004 */
13005xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013007 int len;
13008
13009 if (cur == NULL)
13010 return(NULL);
13011 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013012 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013013}
13014
Daniel Veillard81273902003-09-30 00:43:48 +000013015#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013016/**
13017 * xmlSAXParseDoc:
13018 * @sax: the SAX handler block
13019 * @cur: a pointer to an array of xmlChar
13020 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13021 * documents
13022 *
13023 * parse an XML in-memory document and build a tree.
13024 * It use the given SAX function block to handle the parsing callback.
13025 * If sax is NULL, fallback to the default DOM tree building routines.
13026 *
13027 * Returns the resulting document tree
13028 */
13029
13030xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013031xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013032 xmlDocPtr ret;
13033 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013034 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013035
Daniel Veillard38936062004-11-04 17:45:11 +000013036 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013037
13038
13039 ctxt = xmlCreateDocParserCtxt(cur);
13040 if (ctxt == NULL) return(NULL);
13041 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013042 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013043 ctxt->sax = sax;
13044 ctxt->userData = NULL;
13045 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013046 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013047
13048 xmlParseDocument(ctxt);
13049 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13050 else {
13051 ret = NULL;
13052 xmlFreeDoc(ctxt->myDoc);
13053 ctxt->myDoc = NULL;
13054 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013055 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013056 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013057 xmlFreeParserCtxt(ctxt);
13058
13059 return(ret);
13060}
13061
13062/**
13063 * xmlParseDoc:
13064 * @cur: a pointer to an array of xmlChar
13065 *
13066 * parse an XML in-memory document and build a tree.
13067 *
13068 * Returns the resulting document tree
13069 */
13070
13071xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013072xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013073 return(xmlSAXParseDoc(NULL, cur, 0));
13074}
Daniel Veillard81273902003-09-30 00:43:48 +000013075#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013076
Daniel Veillard81273902003-09-30 00:43:48 +000013077#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013078/************************************************************************
13079 * *
13080 * Specific function to keep track of entities references *
13081 * and used by the XSLT debugger *
13082 * *
13083 ************************************************************************/
13084
13085static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13086
13087/**
13088 * xmlAddEntityReference:
13089 * @ent : A valid entity
13090 * @firstNode : A valid first node for children of entity
13091 * @lastNode : A valid last node of children entity
13092 *
13093 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13094 */
13095static void
13096xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13097 xmlNodePtr lastNode)
13098{
13099 if (xmlEntityRefFunc != NULL) {
13100 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13101 }
13102}
13103
13104
13105/**
13106 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013107 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013108 *
13109 * Set the function to call call back when a xml reference has been made
13110 */
13111void
13112xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13113{
13114 xmlEntityRefFunc = func;
13115}
Daniel Veillard81273902003-09-30 00:43:48 +000013116#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013117
13118/************************************************************************
13119 * *
13120 * Miscellaneous *
13121 * *
13122 ************************************************************************/
13123
13124#ifdef LIBXML_XPATH_ENABLED
13125#include <libxml/xpath.h>
13126#endif
13127
Daniel Veillardffa3c742005-07-21 13:24:09 +000013128extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013129static int xmlParserInitialized = 0;
13130
13131/**
13132 * xmlInitParser:
13133 *
13134 * Initialization function for the XML parser.
13135 * This is not reentrant. Call once before processing in case of
13136 * use in multithreaded programs.
13137 */
13138
13139void
13140xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013141 if (xmlParserInitialized != 0)
13142 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013143
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013144#ifdef LIBXML_THREAD_ENABLED
13145 __xmlGlobalInitMutexLock();
13146 if (xmlParserInitialized == 0) {
13147#endif
13148 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13149 (xmlGenericError == NULL))
13150 initGenericErrorDefaultFunc(NULL);
13151 xmlInitGlobals();
13152 xmlInitThreads();
13153 xmlInitMemory();
13154 xmlInitCharEncodingHandlers();
13155 xmlDefaultSAXHandlerInit();
13156 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013157#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013158 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013159#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013160#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013161 htmlInitAutoClose();
13162 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013163#endif
13164#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013165 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013166#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013167 xmlParserInitialized = 1;
13168#ifdef LIBXML_THREAD_ENABLED
13169 }
13170 __xmlGlobalInitMutexUnlock();
13171#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013172}
13173
13174/**
13175 * xmlCleanupParser:
13176 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013177 * Cleanup function for the XML library. It tries to reclaim all
13178 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000013179 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000013180 * function should not prevent reusing the library but one should
13181 * call xmlCleanupParser() only when the process has
Daniel Veillardccc476f2008-03-04 13:19:49 +000013182 * finished using the library and all XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000013183 */
13184
13185void
13186xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013187 if (!xmlParserInitialized)
13188 return;
13189
Owen Taylor3473f882001-02-23 17:55:21 +000013190 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013191#ifdef LIBXML_CATALOG_ENABLED
13192 xmlCatalogCleanup();
13193#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013194 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013195 xmlCleanupInputCallbacks();
13196#ifdef LIBXML_OUTPUT_ENABLED
13197 xmlCleanupOutputCallbacks();
13198#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013199#ifdef LIBXML_SCHEMAS_ENABLED
13200 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013201 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013202#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013203 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013204 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013205 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013206 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013207 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013208}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013209
13210/************************************************************************
13211 * *
13212 * New set (2.6.0) of simpler and more flexible APIs *
13213 * *
13214 ************************************************************************/
13215
13216/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013217 * DICT_FREE:
13218 * @str: a string
13219 *
13220 * Free a string if it is not owned by the "dict" dictionnary in the
13221 * current scope
13222 */
13223#define DICT_FREE(str) \
13224 if ((str) && ((!dict) || \
13225 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13226 xmlFree((char *)(str));
13227
13228/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013229 * xmlCtxtReset:
13230 * @ctxt: an XML parser context
13231 *
13232 * Reset a parser context
13233 */
13234void
13235xmlCtxtReset(xmlParserCtxtPtr ctxt)
13236{
13237 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013238 xmlDictPtr dict;
13239
13240 if (ctxt == NULL)
13241 return;
13242
13243 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013244
13245 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13246 xmlFreeInputStream(input);
13247 }
13248 ctxt->inputNr = 0;
13249 ctxt->input = NULL;
13250
13251 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013252 if (ctxt->spaceTab != NULL) {
13253 ctxt->spaceTab[0] = -1;
13254 ctxt->space = &ctxt->spaceTab[0];
13255 } else {
13256 ctxt->space = NULL;
13257 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013258
13259
13260 ctxt->nodeNr = 0;
13261 ctxt->node = NULL;
13262
13263 ctxt->nameNr = 0;
13264 ctxt->name = NULL;
13265
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013266 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013267 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013268 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013269 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013270 DICT_FREE(ctxt->directory);
13271 ctxt->directory = NULL;
13272 DICT_FREE(ctxt->extSubURI);
13273 ctxt->extSubURI = NULL;
13274 DICT_FREE(ctxt->extSubSystem);
13275 ctxt->extSubSystem = NULL;
13276 if (ctxt->myDoc != NULL)
13277 xmlFreeDoc(ctxt->myDoc);
13278 ctxt->myDoc = NULL;
13279
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 ctxt->standalone = -1;
13281 ctxt->hasExternalSubset = 0;
13282 ctxt->hasPErefs = 0;
13283 ctxt->html = 0;
13284 ctxt->external = 0;
13285 ctxt->instate = XML_PARSER_START;
13286 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013287
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013288 ctxt->wellFormed = 1;
13289 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013290 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013291 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013292#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013293 ctxt->vctxt.userData = ctxt;
13294 ctxt->vctxt.error = xmlParserValidityError;
13295 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013296#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013297 ctxt->record_info = 0;
13298 ctxt->nbChars = 0;
13299 ctxt->checkIndex = 0;
13300 ctxt->inSubset = 0;
13301 ctxt->errNo = XML_ERR_OK;
13302 ctxt->depth = 0;
13303 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13304 ctxt->catalogs = NULL;
13305 xmlInitNodeInfoSeq(&ctxt->node_seq);
13306
13307 if (ctxt->attsDefault != NULL) {
13308 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13309 ctxt->attsDefault = NULL;
13310 }
13311 if (ctxt->attsSpecial != NULL) {
13312 xmlHashFree(ctxt->attsSpecial, NULL);
13313 ctxt->attsSpecial = NULL;
13314 }
13315
Daniel Veillard4432df22003-09-28 18:58:27 +000013316#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013317 if (ctxt->catalogs != NULL)
13318 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013319#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013320 if (ctxt->lastError.code != XML_ERR_OK)
13321 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013322}
13323
13324/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013325 * xmlCtxtResetPush:
13326 * @ctxt: an XML parser context
13327 * @chunk: a pointer to an array of chars
13328 * @size: number of chars in the array
13329 * @filename: an optional file name or URI
13330 * @encoding: the document encoding, or NULL
13331 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013332 * Reset a push parser context
13333 *
13334 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013335 */
13336int
13337xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13338 int size, const char *filename, const char *encoding)
13339{
13340 xmlParserInputPtr inputStream;
13341 xmlParserInputBufferPtr buf;
13342 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13343
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013344 if (ctxt == NULL)
13345 return(1);
13346
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013347 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13348 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13349
13350 buf = xmlAllocParserInputBuffer(enc);
13351 if (buf == NULL)
13352 return(1);
13353
13354 if (ctxt == NULL) {
13355 xmlFreeParserInputBuffer(buf);
13356 return(1);
13357 }
13358
13359 xmlCtxtReset(ctxt);
13360
13361 if (ctxt->pushTab == NULL) {
13362 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13363 sizeof(xmlChar *));
13364 if (ctxt->pushTab == NULL) {
13365 xmlErrMemory(ctxt, NULL);
13366 xmlFreeParserInputBuffer(buf);
13367 return(1);
13368 }
13369 }
13370
13371 if (filename == NULL) {
13372 ctxt->directory = NULL;
13373 } else {
13374 ctxt->directory = xmlParserGetDirectory(filename);
13375 }
13376
13377 inputStream = xmlNewInputStream(ctxt);
13378 if (inputStream == NULL) {
13379 xmlFreeParserInputBuffer(buf);
13380 return(1);
13381 }
13382
13383 if (filename == NULL)
13384 inputStream->filename = NULL;
13385 else
13386 inputStream->filename = (char *)
13387 xmlCanonicPath((const xmlChar *) filename);
13388 inputStream->buf = buf;
13389 inputStream->base = inputStream->buf->buffer->content;
13390 inputStream->cur = inputStream->buf->buffer->content;
13391 inputStream->end =
13392 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13393
13394 inputPush(ctxt, inputStream);
13395
13396 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13397 (ctxt->input->buf != NULL)) {
13398 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13399 int cur = ctxt->input->cur - ctxt->input->base;
13400
13401 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13402
13403 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13404 ctxt->input->cur = ctxt->input->base + cur;
13405 ctxt->input->end =
13406 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13407 use];
13408#ifdef DEBUG_PUSH
13409 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13410#endif
13411 }
13412
13413 if (encoding != NULL) {
13414 xmlCharEncodingHandlerPtr hdlr;
13415
13416 hdlr = xmlFindCharEncodingHandler(encoding);
13417 if (hdlr != NULL) {
13418 xmlSwitchToEncoding(ctxt, hdlr);
13419 } else {
13420 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13421 "Unsupported encoding %s\n", BAD_CAST encoding);
13422 }
13423 } else if (enc != XML_CHAR_ENCODING_NONE) {
13424 xmlSwitchEncoding(ctxt, enc);
13425 }
13426
13427 return(0);
13428}
13429
13430/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013431 * xmlCtxtUseOptions:
13432 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013433 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013434 *
13435 * Applies the options to the parser context
13436 *
13437 * Returns 0 in case of success, the set of unknown or unimplemented options
13438 * in case of error.
13439 */
13440int
13441xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13442{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013443 if (ctxt == NULL)
13444 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013445 if (options & XML_PARSE_RECOVER) {
13446 ctxt->recovery = 1;
13447 options -= XML_PARSE_RECOVER;
13448 } else
13449 ctxt->recovery = 0;
13450 if (options & XML_PARSE_DTDLOAD) {
13451 ctxt->loadsubset = XML_DETECT_IDS;
13452 options -= XML_PARSE_DTDLOAD;
13453 } else
13454 ctxt->loadsubset = 0;
13455 if (options & XML_PARSE_DTDATTR) {
13456 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13457 options -= XML_PARSE_DTDATTR;
13458 }
13459 if (options & XML_PARSE_NOENT) {
13460 ctxt->replaceEntities = 1;
13461 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13462 options -= XML_PARSE_NOENT;
13463 } else
13464 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013465 if (options & XML_PARSE_PEDANTIC) {
13466 ctxt->pedantic = 1;
13467 options -= XML_PARSE_PEDANTIC;
13468 } else
13469 ctxt->pedantic = 0;
13470 if (options & XML_PARSE_NOBLANKS) {
13471 ctxt->keepBlanks = 0;
13472 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13473 options -= XML_PARSE_NOBLANKS;
13474 } else
13475 ctxt->keepBlanks = 1;
13476 if (options & XML_PARSE_DTDVALID) {
13477 ctxt->validate = 1;
13478 if (options & XML_PARSE_NOWARNING)
13479 ctxt->vctxt.warning = NULL;
13480 if (options & XML_PARSE_NOERROR)
13481 ctxt->vctxt.error = NULL;
13482 options -= XML_PARSE_DTDVALID;
13483 } else
13484 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013485 if (options & XML_PARSE_NOWARNING) {
13486 ctxt->sax->warning = NULL;
13487 options -= XML_PARSE_NOWARNING;
13488 }
13489 if (options & XML_PARSE_NOERROR) {
13490 ctxt->sax->error = NULL;
13491 ctxt->sax->fatalError = NULL;
13492 options -= XML_PARSE_NOERROR;
13493 }
Daniel Veillard81273902003-09-30 00:43:48 +000013494#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013495 if (options & XML_PARSE_SAX1) {
13496 ctxt->sax->startElement = xmlSAX2StartElement;
13497 ctxt->sax->endElement = xmlSAX2EndElement;
13498 ctxt->sax->startElementNs = NULL;
13499 ctxt->sax->endElementNs = NULL;
13500 ctxt->sax->initialized = 1;
13501 options -= XML_PARSE_SAX1;
13502 }
Daniel Veillard81273902003-09-30 00:43:48 +000013503#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013504 if (options & XML_PARSE_NODICT) {
13505 ctxt->dictNames = 0;
13506 options -= XML_PARSE_NODICT;
13507 } else {
13508 ctxt->dictNames = 1;
13509 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013510 if (options & XML_PARSE_NOCDATA) {
13511 ctxt->sax->cdataBlock = NULL;
13512 options -= XML_PARSE_NOCDATA;
13513 }
13514 if (options & XML_PARSE_NSCLEAN) {
13515 ctxt->options |= XML_PARSE_NSCLEAN;
13516 options -= XML_PARSE_NSCLEAN;
13517 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013518 if (options & XML_PARSE_NONET) {
13519 ctxt->options |= XML_PARSE_NONET;
13520 options -= XML_PARSE_NONET;
13521 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013522 if (options & XML_PARSE_COMPACT) {
13523 ctxt->options |= XML_PARSE_COMPACT;
13524 options -= XML_PARSE_COMPACT;
13525 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013526 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013527 return (options);
13528}
13529
13530/**
13531 * xmlDoRead:
13532 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013533 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013534 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013535 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013536 * @reuse: keep the context for reuse
13537 *
13538 * Common front-end for the xmlRead functions
13539 *
13540 * Returns the resulting document tree or NULL
13541 */
13542static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013543xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13544 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013545{
13546 xmlDocPtr ret;
13547
13548 xmlCtxtUseOptions(ctxt, options);
13549 if (encoding != NULL) {
13550 xmlCharEncodingHandlerPtr hdlr;
13551
13552 hdlr = xmlFindCharEncodingHandler(encoding);
13553 if (hdlr != NULL)
13554 xmlSwitchToEncoding(ctxt, hdlr);
13555 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013556 if ((URL != NULL) && (ctxt->input != NULL) &&
13557 (ctxt->input->filename == NULL))
13558 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013559 xmlParseDocument(ctxt);
13560 if ((ctxt->wellFormed) || ctxt->recovery)
13561 ret = ctxt->myDoc;
13562 else {
13563 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013564 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013565 xmlFreeDoc(ctxt->myDoc);
13566 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013567 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013568 ctxt->myDoc = NULL;
13569 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013570 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013571 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013572
13573 return (ret);
13574}
13575
13576/**
13577 * xmlReadDoc:
13578 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013579 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013580 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013581 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013582 *
13583 * parse an XML in-memory document and build a tree.
13584 *
13585 * Returns the resulting document tree
13586 */
13587xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013588xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013589{
13590 xmlParserCtxtPtr ctxt;
13591
13592 if (cur == NULL)
13593 return (NULL);
13594
13595 ctxt = xmlCreateDocParserCtxt(cur);
13596 if (ctxt == NULL)
13597 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013598 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013599}
13600
13601/**
13602 * xmlReadFile:
13603 * @filename: a file or URL
13604 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013605 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013606 *
13607 * parse an XML file from the filesystem or the network.
13608 *
13609 * Returns the resulting document tree
13610 */
13611xmlDocPtr
13612xmlReadFile(const char *filename, const char *encoding, int options)
13613{
13614 xmlParserCtxtPtr ctxt;
13615
Daniel Veillard61b93382003-11-03 14:28:31 +000013616 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013617 if (ctxt == NULL)
13618 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013619 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013620}
13621
13622/**
13623 * xmlReadMemory:
13624 * @buffer: a pointer to a char array
13625 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013626 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013627 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013628 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013629 *
13630 * parse an XML in-memory document and build a tree.
13631 *
13632 * Returns the resulting document tree
13633 */
13634xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013635xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013636{
13637 xmlParserCtxtPtr ctxt;
13638
13639 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13640 if (ctxt == NULL)
13641 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013642 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013643}
13644
13645/**
13646 * xmlReadFd:
13647 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013648 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013649 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013650 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013651 *
13652 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013653 * NOTE that the file descriptor will not be closed when the
13654 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013655 *
13656 * Returns the resulting document tree
13657 */
13658xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013659xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013660{
13661 xmlParserCtxtPtr ctxt;
13662 xmlParserInputBufferPtr input;
13663 xmlParserInputPtr stream;
13664
13665 if (fd < 0)
13666 return (NULL);
13667
13668 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13669 if (input == NULL)
13670 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013671 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013672 ctxt = xmlNewParserCtxt();
13673 if (ctxt == NULL) {
13674 xmlFreeParserInputBuffer(input);
13675 return (NULL);
13676 }
13677 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13678 if (stream == NULL) {
13679 xmlFreeParserInputBuffer(input);
13680 xmlFreeParserCtxt(ctxt);
13681 return (NULL);
13682 }
13683 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013684 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013685}
13686
13687/**
13688 * xmlReadIO:
13689 * @ioread: an I/O read function
13690 * @ioclose: an I/O close function
13691 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013692 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013693 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013694 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013695 *
13696 * parse an XML document from I/O functions and source and build a tree.
13697 *
13698 * Returns the resulting document tree
13699 */
13700xmlDocPtr
13701xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013702 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013703{
13704 xmlParserCtxtPtr ctxt;
13705 xmlParserInputBufferPtr input;
13706 xmlParserInputPtr stream;
13707
13708 if (ioread == NULL)
13709 return (NULL);
13710
13711 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13712 XML_CHAR_ENCODING_NONE);
13713 if (input == NULL)
13714 return (NULL);
13715 ctxt = xmlNewParserCtxt();
13716 if (ctxt == NULL) {
13717 xmlFreeParserInputBuffer(input);
13718 return (NULL);
13719 }
13720 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13721 if (stream == NULL) {
13722 xmlFreeParserInputBuffer(input);
13723 xmlFreeParserCtxt(ctxt);
13724 return (NULL);
13725 }
13726 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013727 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013728}
13729
13730/**
13731 * xmlCtxtReadDoc:
13732 * @ctxt: an XML parser context
13733 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013734 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013735 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013736 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013737 *
13738 * parse an XML in-memory document and build a tree.
13739 * This reuses the existing @ctxt parser context
13740 *
13741 * Returns the resulting document tree
13742 */
13743xmlDocPtr
13744xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013745 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013746{
13747 xmlParserInputPtr stream;
13748
13749 if (cur == NULL)
13750 return (NULL);
13751 if (ctxt == NULL)
13752 return (NULL);
13753
13754 xmlCtxtReset(ctxt);
13755
13756 stream = xmlNewStringInputStream(ctxt, cur);
13757 if (stream == NULL) {
13758 return (NULL);
13759 }
13760 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013761 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013762}
13763
13764/**
13765 * xmlCtxtReadFile:
13766 * @ctxt: an XML parser context
13767 * @filename: a file or URL
13768 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013769 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013770 *
13771 * parse an XML file from the filesystem or the network.
13772 * This reuses the existing @ctxt parser context
13773 *
13774 * Returns the resulting document tree
13775 */
13776xmlDocPtr
13777xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13778 const char *encoding, int options)
13779{
13780 xmlParserInputPtr stream;
13781
13782 if (filename == NULL)
13783 return (NULL);
13784 if (ctxt == NULL)
13785 return (NULL);
13786
13787 xmlCtxtReset(ctxt);
13788
Daniel Veillard29614c72004-11-26 10:47:26 +000013789 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013790 if (stream == NULL) {
13791 return (NULL);
13792 }
13793 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013794 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013795}
13796
13797/**
13798 * xmlCtxtReadMemory:
13799 * @ctxt: an XML parser context
13800 * @buffer: a pointer to a char array
13801 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013802 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013803 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013804 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013805 *
13806 * parse an XML in-memory document and build a tree.
13807 * This reuses the existing @ctxt parser context
13808 *
13809 * Returns the resulting document tree
13810 */
13811xmlDocPtr
13812xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013813 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013814{
13815 xmlParserInputBufferPtr input;
13816 xmlParserInputPtr stream;
13817
13818 if (ctxt == NULL)
13819 return (NULL);
13820 if (buffer == NULL)
13821 return (NULL);
13822
13823 xmlCtxtReset(ctxt);
13824
13825 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13826 if (input == NULL) {
13827 return(NULL);
13828 }
13829
13830 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13831 if (stream == NULL) {
13832 xmlFreeParserInputBuffer(input);
13833 return(NULL);
13834 }
13835
13836 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013837 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013838}
13839
13840/**
13841 * xmlCtxtReadFd:
13842 * @ctxt: an XML parser context
13843 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013844 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013845 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013846 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013847 *
13848 * parse an XML from a file descriptor and build a tree.
13849 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013850 * NOTE that the file descriptor will not be closed when the
13851 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013852 *
13853 * Returns the resulting document tree
13854 */
13855xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013856xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13857 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013858{
13859 xmlParserInputBufferPtr input;
13860 xmlParserInputPtr stream;
13861
13862 if (fd < 0)
13863 return (NULL);
13864 if (ctxt == NULL)
13865 return (NULL);
13866
13867 xmlCtxtReset(ctxt);
13868
13869
13870 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13871 if (input == NULL)
13872 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013873 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013874 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13875 if (stream == NULL) {
13876 xmlFreeParserInputBuffer(input);
13877 return (NULL);
13878 }
13879 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013880 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013881}
13882
13883/**
13884 * xmlCtxtReadIO:
13885 * @ctxt: an XML parser context
13886 * @ioread: an I/O read function
13887 * @ioclose: an I/O close function
13888 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013889 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013890 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013891 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013892 *
13893 * parse an XML document from I/O functions and source and build a tree.
13894 * This reuses the existing @ctxt parser context
13895 *
13896 * Returns the resulting document tree
13897 */
13898xmlDocPtr
13899xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13900 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013901 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013902 const char *encoding, int options)
13903{
13904 xmlParserInputBufferPtr input;
13905 xmlParserInputPtr stream;
13906
13907 if (ioread == NULL)
13908 return (NULL);
13909 if (ctxt == NULL)
13910 return (NULL);
13911
13912 xmlCtxtReset(ctxt);
13913
13914 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13915 XML_CHAR_ENCODING_NONE);
13916 if (input == NULL)
13917 return (NULL);
13918 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13919 if (stream == NULL) {
13920 xmlFreeParserInputBuffer(input);
13921 return (NULL);
13922 }
13923 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013924 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013925}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013926
13927#define bottom_parser
13928#include "elfgcchack.h"