blob: e095ab883ecedcea7447277d06842591ce2bcfbd [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003077 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003211 if ((c != 0) && (!IS_CHAR(c))) {
3212 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3213 "invalid character in attribute value\n");
3214 } else {
3215 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3216 "AttValue: ' expected\n");
3217 }
Owen Taylor3473f882001-02-23 17:55:21 +00003218 } else
3219 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003220 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003221 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003222
3223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003224 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003226}
3227
3228/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003229 * xmlParseAttValue:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse a value for an attribute
3233 * Note: the parser won't do substitution of entities here, this
3234 * will be handled later in xmlStringGetNodeList
3235 *
3236 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3237 * "'" ([^<&'] | Reference)* "'"
3238 *
3239 * 3.3.3 Attribute-Value Normalization:
3240 * Before the value of an attribute is passed to the application or
3241 * checked for validity, the XML processor must normalize it as follows:
3242 * - a character reference is processed by appending the referenced
3243 * character to the attribute value
3244 * - an entity reference is processed by recursively processing the
3245 * replacement text of the entity
3246 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3247 * appending #x20 to the normalized value, except that only a single
3248 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3249 * parsed entity or the literal entity value of an internal parsed entity
3250 * - other characters are processed by appending them to the normalized value
3251 * If the declared value is not CDATA, then the XML processor must further
3252 * process the normalized attribute value by discarding any leading and
3253 * trailing space (#x20) characters, and by replacing sequences of space
3254 * (#x20) characters by a single space (#x20) character.
3255 * All attributes for which no declaration has been read should be treated
3256 * by a non-validating parser as if declared CDATA.
3257 *
3258 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3259 */
3260
3261
3262xmlChar *
3263xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003264 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003265 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003266}
3267
3268/**
Owen Taylor3473f882001-02-23 17:55:21 +00003269 * xmlParseSystemLiteral:
3270 * @ctxt: an XML parser context
3271 *
3272 * parse an XML Literal
3273 *
3274 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3275 *
3276 * Returns the SystemLiteral parsed or NULL
3277 */
3278
3279xmlChar *
3280xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3281 xmlChar *buf = NULL;
3282 int len = 0;
3283 int size = XML_PARSER_BUFFER_SIZE;
3284 int cur, l;
3285 xmlChar stop;
3286 int state = ctxt->instate;
3287 int count = 0;
3288
3289 SHRINK;
3290 if (RAW == '"') {
3291 NEXT;
3292 stop = '"';
3293 } else if (RAW == '\'') {
3294 NEXT;
3295 stop = '\'';
3296 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003297 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003298 return(NULL);
3299 }
3300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003301 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003304 return(NULL);
3305 }
3306 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3307 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003308 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003309 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003310 xmlChar *tmp;
3311
Owen Taylor3473f882001-02-23 17:55:21 +00003312 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003313 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3314 if (tmp == NULL) {
3315 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003316 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003317 ctxt->instate = (xmlParserInputState) state;
3318 return(NULL);
3319 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003320 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003321 }
3322 count++;
3323 if (count > 50) {
3324 GROW;
3325 count = 0;
3326 }
3327 COPY_BUF(l,buf,len,cur);
3328 NEXTL(l);
3329 cur = CUR_CHAR(l);
3330 if (cur == 0) {
3331 GROW;
3332 SHRINK;
3333 cur = CUR_CHAR(l);
3334 }
3335 }
3336 buf[len] = 0;
3337 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003338 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003339 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003340 } else {
3341 NEXT;
3342 }
3343 return(buf);
3344}
3345
3346/**
3347 * xmlParsePubidLiteral:
3348 * @ctxt: an XML parser context
3349 *
3350 * parse an XML public literal
3351 *
3352 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3353 *
3354 * Returns the PubidLiteral parsed or NULL.
3355 */
3356
3357xmlChar *
3358xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3359 xmlChar *buf = NULL;
3360 int len = 0;
3361 int size = XML_PARSER_BUFFER_SIZE;
3362 xmlChar cur;
3363 xmlChar stop;
3364 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003365 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003366
3367 SHRINK;
3368 if (RAW == '"') {
3369 NEXT;
3370 stop = '"';
3371 } else if (RAW == '\'') {
3372 NEXT;
3373 stop = '\'';
3374 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003378 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003380 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 return(NULL);
3382 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003383 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003384 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003385 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003386 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003387 xmlChar *tmp;
3388
Owen Taylor3473f882001-02-23 17:55:21 +00003389 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003390 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3391 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003392 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003393 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003394 return(NULL);
3395 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003396 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003397 }
3398 buf[len++] = cur;
3399 count++;
3400 if (count > 50) {
3401 GROW;
3402 count = 0;
3403 }
3404 NEXT;
3405 cur = CUR;
3406 if (cur == 0) {
3407 GROW;
3408 SHRINK;
3409 cur = CUR;
3410 }
3411 }
3412 buf[len] = 0;
3413 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003415 } else {
3416 NEXT;
3417 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003418 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003419 return(buf);
3420}
3421
Daniel Veillard48b2f892001-02-25 16:11:03 +00003422void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003423
3424/*
3425 * used for the test in the inner loop of the char data testing
3426 */
3427static const unsigned char test_char_data[256] = {
3428 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3429 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3430 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3431 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3432 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3433 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3434 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3435 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3436 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3437 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3438 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3439 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3440 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3441 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3442 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3443 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3456 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3457 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3460};
3461
Owen Taylor3473f882001-02-23 17:55:21 +00003462/**
3463 * xmlParseCharData:
3464 * @ctxt: an XML parser context
3465 * @cdata: int indicating whether we are within a CDATA section
3466 *
3467 * parse a CharData section.
3468 * if we are within a CDATA section ']]>' marks an end of section.
3469 *
3470 * The right angle bracket (>) may be represented using the string "&gt;",
3471 * and must, for compatibility, be escaped using "&gt;" or a character
3472 * reference when it appears in the string "]]>" in content, when that
3473 * string is not marking the end of a CDATA section.
3474 *
3475 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3476 */
3477
3478void
3479xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003480 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003481 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003482 int line = ctxt->input->line;
3483 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003484 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003485
3486 SHRINK;
3487 GROW;
3488 /*
3489 * Accelerated common case where input don't need to be
3490 * modified before passing it to the handler.
3491 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003492 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003493 in = ctxt->input->cur;
3494 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495get_more_space:
3496 while (*in == 0x20) in++;
3497 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003498 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003499 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003500 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003501 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003502 goto get_more_space;
3503 }
3504 if (*in == '<') {
3505 nbchar = in - ctxt->input->cur;
3506 if (nbchar > 0) {
3507 const xmlChar *tmp = ctxt->input->cur;
3508 ctxt->input->cur = in;
3509
Daniel Veillard34099b42004-11-04 17:34:35 +00003510 if ((ctxt->sax != NULL) &&
3511 (ctxt->sax->ignorableWhitespace !=
3512 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003513 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003514 if (ctxt->sax->ignorableWhitespace != NULL)
3515 ctxt->sax->ignorableWhitespace(ctxt->userData,
3516 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003517 } else {
3518 if (ctxt->sax->characters != NULL)
3519 ctxt->sax->characters(ctxt->userData,
3520 tmp, nbchar);
3521 if (*ctxt->space == -1)
3522 *ctxt->space = -2;
3523 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003524 } else if ((ctxt->sax != NULL) &&
3525 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003526 ctxt->sax->characters(ctxt->userData,
3527 tmp, nbchar);
3528 }
3529 }
3530 return;
3531 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003532
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003533get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003535 while (test_char_data[*in]) {
3536 in++;
3537 ccol++;
3538 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003539 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003540 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003541 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003542 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003543 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003544 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003545 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003546 }
3547 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003548 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003549 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003551 return;
3552 }
3553 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003554 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003555 goto get_more;
3556 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003557 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003558 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003559 if ((ctxt->sax != NULL) &&
3560 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003561 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003562 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003563 const xmlChar *tmp = ctxt->input->cur;
3564 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003565
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003566 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003567 if (ctxt->sax->ignorableWhitespace != NULL)
3568 ctxt->sax->ignorableWhitespace(ctxt->userData,
3569 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003570 } else {
3571 if (ctxt->sax->characters != NULL)
3572 ctxt->sax->characters(ctxt->userData,
3573 tmp, nbchar);
3574 if (*ctxt->space == -1)
3575 *ctxt->space = -2;
3576 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003577 line = ctxt->input->line;
3578 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003579 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 if (ctxt->sax->characters != NULL)
3581 ctxt->sax->characters(ctxt->userData,
3582 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003583 line = ctxt->input->line;
3584 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003585 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586 }
3587 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003588 if (*in == 0xD) {
3589 in++;
3590 if (*in == 0xA) {
3591 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003592 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003593 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003594 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003595 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003596 in--;
3597 }
3598 if (*in == '<') {
3599 return;
3600 }
3601 if (*in == '&') {
3602 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 }
3604 SHRINK;
3605 GROW;
3606 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003607 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003608 nbchar = 0;
3609 }
Daniel Veillard50582112001-03-26 22:52:16 +00003610 ctxt->input->line = line;
3611 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003612 xmlParseCharDataComplex(ctxt, cdata);
3613}
3614
Daniel Veillard01c13b52002-12-10 15:19:08 +00003615/**
3616 * xmlParseCharDataComplex:
3617 * @ctxt: an XML parser context
3618 * @cdata: int indicating whether we are within a CDATA section
3619 *
3620 * parse a CharData section.this is the fallback function
3621 * of xmlParseCharData() when the parsing requires handling
3622 * of non-ASCII characters.
3623 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003624void
3625xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003626 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3627 int nbchar = 0;
3628 int cur, l;
3629 int count = 0;
3630
3631 SHRINK;
3632 GROW;
3633 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003634 while ((cur != '<') && /* checked */
3635 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003636 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003637 if ((cur == ']') && (NXT(1) == ']') &&
3638 (NXT(2) == '>')) {
3639 if (cdata) break;
3640 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003641 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003642 }
3643 }
3644 COPY_BUF(l,buf,nbchar,cur);
3645 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003646 buf[nbchar] = 0;
3647
Owen Taylor3473f882001-02-23 17:55:21 +00003648 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003649 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003650 */
3651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003652 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (ctxt->sax->ignorableWhitespace != NULL)
3654 ctxt->sax->ignorableWhitespace(ctxt->userData,
3655 buf, nbchar);
3656 } else {
3657 if (ctxt->sax->characters != NULL)
3658 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003659 if ((ctxt->sax->characters !=
3660 ctxt->sax->ignorableWhitespace) &&
3661 (*ctxt->space == -1))
3662 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003663 }
3664 }
3665 nbchar = 0;
3666 }
3667 count++;
3668 if (count > 50) {
3669 GROW;
3670 count = 0;
3671 }
3672 NEXTL(l);
3673 cur = CUR_CHAR(l);
3674 }
3675 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003676 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003677 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003678 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003679 */
3680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003681 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003682 if (ctxt->sax->ignorableWhitespace != NULL)
3683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3684 } else {
3685 if (ctxt->sax->characters != NULL)
3686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3688 (*ctxt->space == -1))
3689 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003690 }
3691 }
3692 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003693 if ((cur != 0) && (!IS_CHAR(cur))) {
3694 /* Generate the error and skip the offending character */
3695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3696 "PCDATA invalid Char value %d\n",
3697 cur);
3698 NEXTL(l);
3699 }
Owen Taylor3473f882001-02-23 17:55:21 +00003700}
3701
3702/**
3703 * xmlParseExternalID:
3704 * @ctxt: an XML parser context
3705 * @publicID: a xmlChar** receiving PubidLiteral
3706 * @strict: indicate whether we should restrict parsing to only
3707 * production [75], see NOTE below
3708 *
3709 * Parse an External ID or a Public ID
3710 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003712 * 'PUBLIC' S PubidLiteral S SystemLiteral
3713 *
3714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3716 *
3717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3718 *
3719 * Returns the function returns SystemLiteral and in the second
3720 * case publicID receives PubidLiteral, is strict is off
3721 * it is possible to return NULL and have publicID set.
3722 */
3723
3724xmlChar *
3725xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3726 xmlChar *URI = NULL;
3727
3728 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003729
3730 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003732 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003733 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3735 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 SKIP_BLANKS;
3738 URI = xmlParseSystemLiteral(ctxt);
3739 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003743 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003744 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003746 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 SKIP_BLANKS;
3749 *publicID = xmlParsePubidLiteral(ctxt);
3750 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 if (strict) {
3754 /*
3755 * We don't handle [83] so "S SystemLiteral" is required.
3756 */
William M. Brack76e95df2003-10-18 16:20:14 +00003757 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003759 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003760 }
3761 } else {
3762 /*
3763 * We handle [83] so we return immediately, if
3764 * "S SystemLiteral" is not detected. From a purely parsing
3765 * point of view that's a nice mess.
3766 */
3767 const xmlChar *ptr;
3768 GROW;
3769
3770 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003771 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003772
William M. Brack76e95df2003-10-18 16:20:14 +00003773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3775 }
3776 SKIP_BLANKS;
3777 URI = xmlParseSystemLiteral(ctxt);
3778 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003780 }
3781 }
3782 return(URI);
3783}
3784
3785/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003786 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003787 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003788 * @buf: the already parsed part of the buffer
3789 * @len: number of bytes filles in the buffer
3790 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * Skip an XML (SGML) comment <!-- .... -->
3793 * The spec says that "For compatibility, the string "--" (double-hyphen)
3794 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003795 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003796 *
3797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3798 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003799static void
3800xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003801 int q, ql;
3802 int r, rl;
3803 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 xmlParserInputPtr input = ctxt->input;
3805 int count = 0;
3806
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003808 len = 0;
3809 size = XML_PARSER_BUFFER_SIZE;
3810 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3811 if (buf == NULL) {
3812 xmlErrMemory(ctxt, NULL);
3813 return;
3814 }
Owen Taylor3473f882001-02-23 17:55:21 +00003815 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003816 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003817 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003818 if (q == 0)
3819 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 NEXTL(ql);
3821 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003822 if (r == 0)
3823 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003824 NEXTL(rl);
3825 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003826 if (cur == 0)
3827 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003828 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003829 ((cur != '>') ||
3830 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003831 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003832 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 }
3834 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003835 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003836 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003837 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3838 if (new_buf == NULL) {
3839 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003840 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003841 return;
3842 }
William M. Bracka3215c72004-07-31 16:24:01 +00003843 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 COPY_BUF(ql,buf,len,q);
3846 q = r;
3847 ql = rl;
3848 r = cur;
3849 rl = l;
3850
3851 count++;
3852 if (count > 50) {
3853 GROW;
3854 count = 0;
3855 }
3856 NEXTL(l);
3857 cur = CUR_CHAR(l);
3858 if (cur == 0) {
3859 SHRINK;
3860 GROW;
3861 cur = CUR_CHAR(l);
3862 }
3863 }
3864 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003865 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003866 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003867 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003868 xmlFree(buf);
3869 } else {
3870 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003871 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3872 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003873 }
3874 NEXT;
3875 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3876 (!ctxt->disableSAX))
3877 ctxt->sax->comment(ctxt->userData, buf);
3878 xmlFree(buf);
3879 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003880 return;
3881not_terminated:
3882 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3883 "Comment not terminated\n", NULL);
3884 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003885}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003886/**
3887 * xmlParseComment:
3888 * @ctxt: an XML parser context
3889 *
3890 * Skip an XML (SGML) comment <!-- .... -->
3891 * The spec says that "For compatibility, the string "--" (double-hyphen)
3892 * must not occur within comments. "
3893 *
3894 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3895 */
3896void
3897xmlParseComment(xmlParserCtxtPtr ctxt) {
3898 xmlChar *buf = NULL;
3899 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003900 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003901 xmlParserInputState state;
3902 const xmlChar *in;
3903 int nbchar = 0, ccol;
3904
3905 /*
3906 * Check that there is a comment right here.
3907 */
3908 if ((RAW != '<') || (NXT(1) != '!') ||
3909 (NXT(2) != '-') || (NXT(3) != '-')) return;
3910
3911 state = ctxt->instate;
3912 ctxt->instate = XML_PARSER_COMMENT;
3913 SKIP(4);
3914 SHRINK;
3915 GROW;
3916
3917 /*
3918 * Accelerated common case where input don't need to be
3919 * modified before passing it to the handler.
3920 */
3921 in = ctxt->input->cur;
3922 do {
3923 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003924 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003925 ctxt->input->line++; ctxt->input->col = 1;
3926 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003927 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003928 }
3929get_more:
3930 ccol = ctxt->input->col;
3931 while (((*in > '-') && (*in <= 0x7F)) ||
3932 ((*in >= 0x20) && (*in < '-')) ||
3933 (*in == 0x09)) {
3934 in++;
3935 ccol++;
3936 }
3937 ctxt->input->col = ccol;
3938 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003939 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003940 ctxt->input->line++; ctxt->input->col = 1;
3941 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003942 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003943 goto get_more;
3944 }
3945 nbchar = in - ctxt->input->cur;
3946 /*
3947 * save current set of data
3948 */
3949 if (nbchar > 0) {
3950 if ((ctxt->sax != NULL) &&
3951 (ctxt->sax->comment != NULL)) {
3952 if (buf == NULL) {
3953 if ((*in == '-') && (in[1] == '-'))
3954 size = nbchar + 1;
3955 else
3956 size = XML_PARSER_BUFFER_SIZE + nbchar;
3957 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3958 if (buf == NULL) {
3959 xmlErrMemory(ctxt, NULL);
3960 ctxt->instate = state;
3961 return;
3962 }
3963 len = 0;
3964 } else if (len + nbchar + 1 >= size) {
3965 xmlChar *new_buf;
3966 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3967 new_buf = (xmlChar *) xmlRealloc(buf,
3968 size * sizeof(xmlChar));
3969 if (new_buf == NULL) {
3970 xmlFree (buf);
3971 xmlErrMemory(ctxt, NULL);
3972 ctxt->instate = state;
3973 return;
3974 }
3975 buf = new_buf;
3976 }
3977 memcpy(&buf[len], ctxt->input->cur, nbchar);
3978 len += nbchar;
3979 buf[len] = 0;
3980 }
3981 }
3982 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003983 if (*in == 0xA) {
3984 in++;
3985 ctxt->input->line++; ctxt->input->col = 1;
3986 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003987 if (*in == 0xD) {
3988 in++;
3989 if (*in == 0xA) {
3990 ctxt->input->cur = in;
3991 in++;
3992 ctxt->input->line++; ctxt->input->col = 1;
3993 continue; /* while */
3994 }
3995 in--;
3996 }
3997 SHRINK;
3998 GROW;
3999 in = ctxt->input->cur;
4000 if (*in == '-') {
4001 if (in[1] == '-') {
4002 if (in[2] == '>') {
4003 SKIP(3);
4004 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4005 (!ctxt->disableSAX)) {
4006 if (buf != NULL)
4007 ctxt->sax->comment(ctxt->userData, buf);
4008 else
4009 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4010 }
4011 if (buf != NULL)
4012 xmlFree(buf);
4013 ctxt->instate = state;
4014 return;
4015 }
4016 if (buf != NULL)
4017 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4018 "Comment not terminated \n<!--%.50s\n",
4019 buf);
4020 else
4021 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4022 "Comment not terminated \n", NULL);
4023 in++;
4024 ctxt->input->col++;
4025 }
4026 in++;
4027 ctxt->input->col++;
4028 goto get_more;
4029 }
4030 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4031 xmlParseCommentComplex(ctxt, buf, len, size);
4032 ctxt->instate = state;
4033 return;
4034}
4035
Owen Taylor3473f882001-02-23 17:55:21 +00004036
4037/**
4038 * xmlParsePITarget:
4039 * @ctxt: an XML parser context
4040 *
4041 * parse the name of a PI
4042 *
4043 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4044 *
4045 * Returns the PITarget name or NULL
4046 */
4047
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004048const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004049xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004050 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004051
4052 name = xmlParseName(ctxt);
4053 if ((name != NULL) &&
4054 ((name[0] == 'x') || (name[0] == 'X')) &&
4055 ((name[1] == 'm') || (name[1] == 'M')) &&
4056 ((name[2] == 'l') || (name[2] == 'L'))) {
4057 int i;
4058 if ((name[0] == 'x') && (name[1] == 'm') &&
4059 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004060 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004061 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004062 return(name);
4063 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004064 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004065 return(name);
4066 }
4067 for (i = 0;;i++) {
4068 if (xmlW3CPIs[i] == NULL) break;
4069 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4070 return(name);
4071 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004072 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4073 "xmlParsePITarget: invalid name prefix 'xml'\n",
4074 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004075 }
4076 return(name);
4077}
4078
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004079#ifdef LIBXML_CATALOG_ENABLED
4080/**
4081 * xmlParseCatalogPI:
4082 * @ctxt: an XML parser context
4083 * @catalog: the PI value string
4084 *
4085 * parse an XML Catalog Processing Instruction.
4086 *
4087 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4088 *
4089 * Occurs only if allowed by the user and if happening in the Misc
4090 * part of the document before any doctype informations
4091 * This will add the given catalog to the parsing context in order
4092 * to be used if there is a resolution need further down in the document
4093 */
4094
4095static void
4096xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4097 xmlChar *URL = NULL;
4098 const xmlChar *tmp, *base;
4099 xmlChar marker;
4100
4101 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004102 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004103 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4104 goto error;
4105 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004106 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004107 if (*tmp != '=') {
4108 return;
4109 }
4110 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004111 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004112 marker = *tmp;
4113 if ((marker != '\'') && (marker != '"'))
4114 goto error;
4115 tmp++;
4116 base = tmp;
4117 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4118 if (*tmp == 0)
4119 goto error;
4120 URL = xmlStrndup(base, tmp - base);
4121 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004122 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004123 if (*tmp != 0)
4124 goto error;
4125
4126 if (URL != NULL) {
4127 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4128 xmlFree(URL);
4129 }
4130 return;
4131
4132error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004133 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4134 "Catalog PI syntax error: %s\n",
4135 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004136 if (URL != NULL)
4137 xmlFree(URL);
4138}
4139#endif
4140
Owen Taylor3473f882001-02-23 17:55:21 +00004141/**
4142 * xmlParsePI:
4143 * @ctxt: an XML parser context
4144 *
4145 * parse an XML Processing Instruction.
4146 *
4147 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4148 *
4149 * The processing is transfered to SAX once parsed.
4150 */
4151
4152void
4153xmlParsePI(xmlParserCtxtPtr ctxt) {
4154 xmlChar *buf = NULL;
4155 int len = 0;
4156 int size = XML_PARSER_BUFFER_SIZE;
4157 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004158 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004159 xmlParserInputState state;
4160 int count = 0;
4161
4162 if ((RAW == '<') && (NXT(1) == '?')) {
4163 xmlParserInputPtr input = ctxt->input;
4164 state = ctxt->instate;
4165 ctxt->instate = XML_PARSER_PI;
4166 /*
4167 * this is a Processing Instruction.
4168 */
4169 SKIP(2);
4170 SHRINK;
4171
4172 /*
4173 * Parse the target name and check for special support like
4174 * namespace.
4175 */
4176 target = xmlParsePITarget(ctxt);
4177 if (target != NULL) {
4178 if ((RAW == '?') && (NXT(1) == '>')) {
4179 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004180 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4181 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004182 }
4183 SKIP(2);
4184
4185 /*
4186 * SAX: PI detected.
4187 */
4188 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4189 (ctxt->sax->processingInstruction != NULL))
4190 ctxt->sax->processingInstruction(ctxt->userData,
4191 target, NULL);
4192 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004193 return;
4194 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004195 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004196 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004198 ctxt->instate = state;
4199 return;
4200 }
4201 cur = CUR;
4202 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004203 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4204 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 }
4206 SKIP_BLANKS;
4207 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004208 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004209 ((cur != '?') || (NXT(1) != '>'))) {
4210 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004211 xmlChar *tmp;
4212
Owen Taylor3473f882001-02-23 17:55:21 +00004213 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004214 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4215 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004216 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004217 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004218 ctxt->instate = state;
4219 return;
4220 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004221 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004222 }
4223 count++;
4224 if (count > 50) {
4225 GROW;
4226 count = 0;
4227 }
4228 COPY_BUF(l,buf,len,cur);
4229 NEXTL(l);
4230 cur = CUR_CHAR(l);
4231 if (cur == 0) {
4232 SHRINK;
4233 GROW;
4234 cur = CUR_CHAR(l);
4235 }
4236 }
4237 buf[len] = 0;
4238 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004239 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4240 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 } else {
4242 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004243 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4244 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004245 }
4246 SKIP(2);
4247
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004248#ifdef LIBXML_CATALOG_ENABLED
4249 if (((state == XML_PARSER_MISC) ||
4250 (state == XML_PARSER_START)) &&
4251 (xmlStrEqual(target, XML_CATALOG_PI))) {
4252 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4253 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4254 (allow == XML_CATA_ALLOW_ALL))
4255 xmlParseCatalogPI(ctxt, buf);
4256 }
4257#endif
4258
4259
Owen Taylor3473f882001-02-23 17:55:21 +00004260 /*
4261 * SAX: PI detected.
4262 */
4263 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4264 (ctxt->sax->processingInstruction != NULL))
4265 ctxt->sax->processingInstruction(ctxt->userData,
4266 target, buf);
4267 }
4268 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004270 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004271 }
4272 ctxt->instate = state;
4273 }
4274}
4275
4276/**
4277 * xmlParseNotationDecl:
4278 * @ctxt: an XML parser context
4279 *
4280 * parse a notation declaration
4281 *
4282 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4283 *
4284 * Hence there is actually 3 choices:
4285 * 'PUBLIC' S PubidLiteral
4286 * 'PUBLIC' S PubidLiteral S SystemLiteral
4287 * and 'SYSTEM' S SystemLiteral
4288 *
4289 * See the NOTE on xmlParseExternalID().
4290 */
4291
4292void
4293xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004294 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 xmlChar *Pubid;
4296 xmlChar *Systemid;
4297
Daniel Veillarda07050d2003-10-19 14:46:32 +00004298 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004299 xmlParserInputPtr input = ctxt->input;
4300 SHRINK;
4301 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004302 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004303 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4304 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004305 return;
4306 }
4307 SKIP_BLANKS;
4308
Daniel Veillard76d66f42001-05-16 21:05:17 +00004309 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004311 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004312 return;
4313 }
William M. Brack76e95df2003-10-18 16:20:14 +00004314 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004315 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004316 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004317 return;
4318 }
4319 SKIP_BLANKS;
4320
4321 /*
4322 * Parse the IDs.
4323 */
4324 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4325 SKIP_BLANKS;
4326
4327 if (RAW == '>') {
4328 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4330 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004331 }
4332 NEXT;
4333 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4334 (ctxt->sax->notationDecl != NULL))
4335 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4336 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004337 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
Owen Taylor3473f882001-02-23 17:55:21 +00004339 if (Systemid != NULL) xmlFree(Systemid);
4340 if (Pubid != NULL) xmlFree(Pubid);
4341 }
4342}
4343
4344/**
4345 * xmlParseEntityDecl:
4346 * @ctxt: an XML parser context
4347 *
4348 * parse <!ENTITY declarations
4349 *
4350 * [70] EntityDecl ::= GEDecl | PEDecl
4351 *
4352 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4353 *
4354 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4355 *
4356 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4357 *
4358 * [74] PEDef ::= EntityValue | ExternalID
4359 *
4360 * [76] NDataDecl ::= S 'NDATA' S Name
4361 *
4362 * [ VC: Notation Declared ]
4363 * The Name must match the declared name of a notation.
4364 */
4365
4366void
4367xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004368 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004369 xmlChar *value = NULL;
4370 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004371 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004372 int isParameter = 0;
4373 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004374 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004375
Daniel Veillard4c778d82005-01-23 17:37:44 +00004376 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004377 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004378 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 SHRINK;
4380 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004381 skipped = SKIP_BLANKS;
4382 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004383 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4384 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004385 }
Owen Taylor3473f882001-02-23 17:55:21 +00004386
4387 if (RAW == '%') {
4388 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004389 skipped = SKIP_BLANKS;
4390 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004391 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4392 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004393 }
Owen Taylor3473f882001-02-23 17:55:21 +00004394 isParameter = 1;
4395 }
4396
Daniel Veillard76d66f42001-05-16 21:05:17 +00004397 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004398 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004399 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4400 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004401 return;
4402 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004403 skipped = SKIP_BLANKS;
4404 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004405 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4406 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004407 }
Owen Taylor3473f882001-02-23 17:55:21 +00004408
Daniel Veillardf5582f12002-06-11 10:08:16 +00004409 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004410 /*
4411 * handle the various case of definitions...
4412 */
4413 if (isParameter) {
4414 if ((RAW == '"') || (RAW == '\'')) {
4415 value = xmlParseEntityValue(ctxt, &orig);
4416 if (value) {
4417 if ((ctxt->sax != NULL) &&
4418 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4419 ctxt->sax->entityDecl(ctxt->userData, name,
4420 XML_INTERNAL_PARAMETER_ENTITY,
4421 NULL, NULL, value);
4422 }
4423 } else {
4424 URI = xmlParseExternalID(ctxt, &literal, 1);
4425 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004426 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004427 }
4428 if (URI) {
4429 xmlURIPtr uri;
4430
4431 uri = xmlParseURI((const char *) URI);
4432 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004433 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4434 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004435 /*
4436 * This really ought to be a well formedness error
4437 * but the XML Core WG decided otherwise c.f. issue
4438 * E26 of the XML erratas.
4439 */
Owen Taylor3473f882001-02-23 17:55:21 +00004440 } else {
4441 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004442 /*
4443 * Okay this is foolish to block those but not
4444 * invalid URIs.
4445 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004446 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 } else {
4448 if ((ctxt->sax != NULL) &&
4449 (!ctxt->disableSAX) &&
4450 (ctxt->sax->entityDecl != NULL))
4451 ctxt->sax->entityDecl(ctxt->userData, name,
4452 XML_EXTERNAL_PARAMETER_ENTITY,
4453 literal, URI, NULL);
4454 }
4455 xmlFreeURI(uri);
4456 }
4457 }
4458 }
4459 } else {
4460 if ((RAW == '"') || (RAW == '\'')) {
4461 value = xmlParseEntityValue(ctxt, &orig);
4462 if ((ctxt->sax != NULL) &&
4463 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4464 ctxt->sax->entityDecl(ctxt->userData, name,
4465 XML_INTERNAL_GENERAL_ENTITY,
4466 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004467 /*
4468 * For expat compatibility in SAX mode.
4469 */
4470 if ((ctxt->myDoc == NULL) ||
4471 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4472 if (ctxt->myDoc == NULL) {
4473 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4474 }
4475 if (ctxt->myDoc->intSubset == NULL)
4476 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4477 BAD_CAST "fake", NULL, NULL);
4478
Daniel Veillard1af9a412003-08-20 22:54:39 +00004479 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4480 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004481 }
Owen Taylor3473f882001-02-23 17:55:21 +00004482 } else {
4483 URI = xmlParseExternalID(ctxt, &literal, 1);
4484 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004485 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004486 }
4487 if (URI) {
4488 xmlURIPtr uri;
4489
4490 uri = xmlParseURI((const char *)URI);
4491 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004492 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4493 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004494 /*
4495 * This really ought to be a well formedness error
4496 * but the XML Core WG decided otherwise c.f. issue
4497 * E26 of the XML erratas.
4498 */
Owen Taylor3473f882001-02-23 17:55:21 +00004499 } else {
4500 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004501 /*
4502 * Okay this is foolish to block those but not
4503 * invalid URIs.
4504 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004506 }
4507 xmlFreeURI(uri);
4508 }
4509 }
William M. Brack76e95df2003-10-18 16:20:14 +00004510 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004511 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4512 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004513 }
4514 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004515 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004516 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004517 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004518 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4519 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004520 }
4521 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004522 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004523 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4524 (ctxt->sax->unparsedEntityDecl != NULL))
4525 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4526 literal, URI, ndata);
4527 } else {
4528 if ((ctxt->sax != NULL) &&
4529 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4530 ctxt->sax->entityDecl(ctxt->userData, name,
4531 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4532 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004533 /*
4534 * For expat compatibility in SAX mode.
4535 * assuming the entity repalcement was asked for
4536 */
4537 if ((ctxt->replaceEntities != 0) &&
4538 ((ctxt->myDoc == NULL) ||
4539 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4540 if (ctxt->myDoc == NULL) {
4541 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4542 }
4543
4544 if (ctxt->myDoc->intSubset == NULL)
4545 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4546 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004547 xmlSAX2EntityDecl(ctxt, name,
4548 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4549 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004550 }
Owen Taylor3473f882001-02-23 17:55:21 +00004551 }
4552 }
4553 }
4554 SKIP_BLANKS;
4555 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004556 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004557 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 } else {
4559 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004560 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4561 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004562 }
4563 NEXT;
4564 }
4565 if (orig != NULL) {
4566 /*
4567 * Ugly mechanism to save the raw entity value.
4568 */
4569 xmlEntityPtr cur = NULL;
4570
4571 if (isParameter) {
4572 if ((ctxt->sax != NULL) &&
4573 (ctxt->sax->getParameterEntity != NULL))
4574 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4575 } else {
4576 if ((ctxt->sax != NULL) &&
4577 (ctxt->sax->getEntity != NULL))
4578 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004579 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004580 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004581 }
Owen Taylor3473f882001-02-23 17:55:21 +00004582 }
4583 if (cur != NULL) {
4584 if (cur->orig != NULL)
4585 xmlFree(orig);
4586 else
4587 cur->orig = orig;
4588 } else
4589 xmlFree(orig);
4590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591 if (value != NULL) xmlFree(value);
4592 if (URI != NULL) xmlFree(URI);
4593 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004594 }
4595}
4596
4597/**
4598 * xmlParseDefaultDecl:
4599 * @ctxt: an XML parser context
4600 * @value: Receive a possible fixed default value for the attribute
4601 *
4602 * Parse an attribute default declaration
4603 *
4604 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4605 *
4606 * [ VC: Required Attribute ]
4607 * if the default declaration is the keyword #REQUIRED, then the
4608 * attribute must be specified for all elements of the type in the
4609 * attribute-list declaration.
4610 *
4611 * [ VC: Attribute Default Legal ]
4612 * The declared default value must meet the lexical constraints of
4613 * the declared attribute type c.f. xmlValidateAttributeDecl()
4614 *
4615 * [ VC: Fixed Attribute Default ]
4616 * if an attribute has a default value declared with the #FIXED
4617 * keyword, instances of that attribute must match the default value.
4618 *
4619 * [ WFC: No < in Attribute Values ]
4620 * handled in xmlParseAttValue()
4621 *
4622 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4623 * or XML_ATTRIBUTE_FIXED.
4624 */
4625
4626int
4627xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4628 int val;
4629 xmlChar *ret;
4630
4631 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004632 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004633 SKIP(9);
4634 return(XML_ATTRIBUTE_REQUIRED);
4635 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004636 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004637 SKIP(8);
4638 return(XML_ATTRIBUTE_IMPLIED);
4639 }
4640 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004641 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004642 SKIP(6);
4643 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004644 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004645 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4646 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004647 }
4648 SKIP_BLANKS;
4649 }
4650 ret = xmlParseAttValue(ctxt);
4651 ctxt->instate = XML_PARSER_DTD;
4652 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004653 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004654 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004655 } else
4656 *value = ret;
4657 return(val);
4658}
4659
4660/**
4661 * xmlParseNotationType:
4662 * @ctxt: an XML parser context
4663 *
4664 * parse an Notation attribute type.
4665 *
4666 * Note: the leading 'NOTATION' S part has already being parsed...
4667 *
4668 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4669 *
4670 * [ VC: Notation Attributes ]
4671 * Values of this type must match one of the notation names included
4672 * in the declaration; all notation names in the declaration must be declared.
4673 *
4674 * Returns: the notation attribute tree built while parsing
4675 */
4676
4677xmlEnumerationPtr
4678xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004679 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004680 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4681
4682 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004683 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 return(NULL);
4685 }
4686 SHRINK;
4687 do {
4688 NEXT;
4689 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004690 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004692 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4693 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004694 return(ret);
4695 }
4696 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (cur == NULL) return(ret);
4698 if (last == NULL) ret = last = cur;
4699 else {
4700 last->next = cur;
4701 last = cur;
4702 }
4703 SKIP_BLANKS;
4704 } while (RAW == '|');
4705 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 if ((last != NULL) && (last != ret))
4708 xmlFreeEnumeration(last);
4709 return(ret);
4710 }
4711 NEXT;
4712 return(ret);
4713}
4714
4715/**
4716 * xmlParseEnumerationType:
4717 * @ctxt: an XML parser context
4718 *
4719 * parse an Enumeration attribute type.
4720 *
4721 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4722 *
4723 * [ VC: Enumeration ]
4724 * Values of this type must match one of the Nmtoken tokens in
4725 * the declaration
4726 *
4727 * Returns: the enumeration attribute tree built while parsing
4728 */
4729
4730xmlEnumerationPtr
4731xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4732 xmlChar *name;
4733 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4734
4735 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004736 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004737 return(NULL);
4738 }
4739 SHRINK;
4740 do {
4741 NEXT;
4742 SKIP_BLANKS;
4743 name = xmlParseNmtoken(ctxt);
4744 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004745 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004746 return(ret);
4747 }
4748 cur = xmlCreateEnumeration(name);
4749 xmlFree(name);
4750 if (cur == NULL) return(ret);
4751 if (last == NULL) ret = last = cur;
4752 else {
4753 last->next = cur;
4754 last = cur;
4755 }
4756 SKIP_BLANKS;
4757 } while (RAW == '|');
4758 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004759 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004760 return(ret);
4761 }
4762 NEXT;
4763 return(ret);
4764}
4765
4766/**
4767 * xmlParseEnumeratedType:
4768 * @ctxt: an XML parser context
4769 * @tree: the enumeration tree built while parsing
4770 *
4771 * parse an Enumerated attribute type.
4772 *
4773 * [57] EnumeratedType ::= NotationType | Enumeration
4774 *
4775 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4776 *
4777 *
4778 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4779 */
4780
4781int
4782xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004783 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004784 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004785 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4787 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004788 return(0);
4789 }
4790 SKIP_BLANKS;
4791 *tree = xmlParseNotationType(ctxt);
4792 if (*tree == NULL) return(0);
4793 return(XML_ATTRIBUTE_NOTATION);
4794 }
4795 *tree = xmlParseEnumerationType(ctxt);
4796 if (*tree == NULL) return(0);
4797 return(XML_ATTRIBUTE_ENUMERATION);
4798}
4799
4800/**
4801 * xmlParseAttributeType:
4802 * @ctxt: an XML parser context
4803 * @tree: the enumeration tree built while parsing
4804 *
4805 * parse the Attribute list def for an element
4806 *
4807 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4808 *
4809 * [55] StringType ::= 'CDATA'
4810 *
4811 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4812 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4813 *
4814 * Validity constraints for attribute values syntax are checked in
4815 * xmlValidateAttributeValue()
4816 *
4817 * [ VC: ID ]
4818 * Values of type ID must match the Name production. A name must not
4819 * appear more than once in an XML document as a value of this type;
4820 * i.e., ID values must uniquely identify the elements which bear them.
4821 *
4822 * [ VC: One ID per Element Type ]
4823 * No element type may have more than one ID attribute specified.
4824 *
4825 * [ VC: ID Attribute Default ]
4826 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4827 *
4828 * [ VC: IDREF ]
4829 * Values of type IDREF must match the Name production, and values
4830 * of type IDREFS must match Names; each IDREF Name must match the value
4831 * of an ID attribute on some element in the XML document; i.e. IDREF
4832 * values must match the value of some ID attribute.
4833 *
4834 * [ VC: Entity Name ]
4835 * Values of type ENTITY must match the Name production, values
4836 * of type ENTITIES must match Names; each Entity Name must match the
4837 * name of an unparsed entity declared in the DTD.
4838 *
4839 * [ VC: Name Token ]
4840 * Values of type NMTOKEN must match the Nmtoken production; values
4841 * of type NMTOKENS must match Nmtokens.
4842 *
4843 * Returns the attribute type
4844 */
4845int
4846xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4847 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004848 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004849 SKIP(5);
4850 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004851 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004852 SKIP(6);
4853 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004854 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004855 SKIP(5);
4856 return(XML_ATTRIBUTE_IDREF);
4857 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4858 SKIP(2);
4859 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004860 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004861 SKIP(6);
4862 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004863 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 SKIP(8);
4865 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004866 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004867 SKIP(8);
4868 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004869 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004870 SKIP(7);
4871 return(XML_ATTRIBUTE_NMTOKEN);
4872 }
4873 return(xmlParseEnumeratedType(ctxt, tree));
4874}
4875
4876/**
4877 * xmlParseAttributeListDecl:
4878 * @ctxt: an XML parser context
4879 *
4880 * : parse the Attribute list def for an element
4881 *
4882 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4883 *
4884 * [53] AttDef ::= S Name S AttType S DefaultDecl
4885 *
4886 */
4887void
4888xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004889 const xmlChar *elemName;
4890 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004891 xmlEnumerationPtr tree;
4892
Daniel Veillarda07050d2003-10-19 14:46:32 +00004893 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004894 xmlParserInputPtr input = ctxt->input;
4895
4896 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004897 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004899 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004900 }
4901 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004902 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4905 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004906 return;
4907 }
4908 SKIP_BLANKS;
4909 GROW;
4910 while (RAW != '>') {
4911 const xmlChar *check = CUR_PTR;
4912 int type;
4913 int def;
4914 xmlChar *defaultValue = NULL;
4915
4916 GROW;
4917 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004918 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4921 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 break;
4923 }
4924 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004925 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004926 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004927 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004928 break;
4929 }
4930 SKIP_BLANKS;
4931
4932 type = xmlParseAttributeType(ctxt, &tree);
4933 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004934 break;
4935 }
4936
4937 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004938 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004939 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4940 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004941 if (tree != NULL)
4942 xmlFreeEnumeration(tree);
4943 break;
4944 }
4945 SKIP_BLANKS;
4946
4947 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4948 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004949 if (defaultValue != NULL)
4950 xmlFree(defaultValue);
4951 if (tree != NULL)
4952 xmlFreeEnumeration(tree);
4953 break;
4954 }
4955
4956 GROW;
4957 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004960 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 if (defaultValue != NULL)
4962 xmlFree(defaultValue);
4963 if (tree != NULL)
4964 xmlFreeEnumeration(tree);
4965 break;
4966 }
4967 SKIP_BLANKS;
4968 }
4969 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004970 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4971 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004972 if (defaultValue != NULL)
4973 xmlFree(defaultValue);
4974 if (tree != NULL)
4975 xmlFreeEnumeration(tree);
4976 break;
4977 }
4978 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4979 (ctxt->sax->attributeDecl != NULL))
4980 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4981 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004982 else if (tree != NULL)
4983 xmlFreeEnumeration(tree);
4984
4985 if ((ctxt->sax2) && (defaultValue != NULL) &&
4986 (def != XML_ATTRIBUTE_IMPLIED) &&
4987 (def != XML_ATTRIBUTE_REQUIRED)) {
4988 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4989 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004990 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4991 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4992 }
Owen Taylor3473f882001-02-23 17:55:21 +00004993 if (defaultValue != NULL)
4994 xmlFree(defaultValue);
4995 GROW;
4996 }
4997 if (RAW == '>') {
4998 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004999 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5000 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005001 }
5002 NEXT;
5003 }
Owen Taylor3473f882001-02-23 17:55:21 +00005004 }
5005}
5006
5007/**
5008 * xmlParseElementMixedContentDecl:
5009 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005010 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005011 *
5012 * parse the declaration for a Mixed Element content
5013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5014 *
5015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5016 * '(' S? '#PCDATA' S? ')'
5017 *
5018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5019 *
5020 * [ VC: No Duplicate Types ]
5021 * The same name must not appear more than once in a single
5022 * mixed-content declaration.
5023 *
5024 * returns: the list of the xmlElementContentPtr describing the element choices
5025 */
5026xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005028 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005029 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005030
5031 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005033 SKIP(7);
5034 SKIP_BLANKS;
5035 SHRINK;
5036 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005037 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005038 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5039"Element content declaration doesn't start and stop in the same entity\n",
5040 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005041 }
Owen Taylor3473f882001-02-23 17:55:21 +00005042 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005044 if (RAW == '*') {
5045 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5046 NEXT;
5047 }
5048 return(ret);
5049 }
5050 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005051 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 if (ret == NULL) return(NULL);
5053 }
5054 while (RAW == '|') {
5055 NEXT;
5056 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005057 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005058 if (ret == NULL) return(NULL);
5059 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005060 if (cur != NULL)
5061 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005062 cur = ret;
5063 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005064 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005065 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005066 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005067 if (n->c1 != NULL)
5068 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005069 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005070 if (n != NULL)
5071 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005073 }
5074 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005075 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005077 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005078 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005079 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005080 return(NULL);
5081 }
5082 SKIP_BLANKS;
5083 GROW;
5084 }
5085 if ((RAW == ')') && (NXT(1) == '*')) {
5086 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005087 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005088 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005089 if (cur->c2 != NULL)
5090 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 }
5092 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005093 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005094 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5095"Element content declaration doesn't start and stop in the same entity\n",
5096 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005097 }
Owen Taylor3473f882001-02-23 17:55:21 +00005098 SKIP(2);
5099 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005100 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005101 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 return(NULL);
5103 }
5104
5105 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005106 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005107 }
5108 return(ret);
5109}
5110
5111/**
5112 * xmlParseElementChildrenContentDecl:
5113 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005114 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005115 *
5116 * parse the declaration for a Mixed Element content
5117 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5118 *
5119 *
5120 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5121 *
5122 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5123 *
5124 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5125 *
5126 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5127 *
5128 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5129 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005130 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005131 * opening or closing parentheses in a choice, seq, or Mixed
5132 * construct is contained in the replacement text for a parameter
5133 * entity, both must be contained in the same replacement text. For
5134 * interoperability, if a parameter-entity reference appears in a
5135 * choice, seq, or Mixed construct, its replacement text should not
5136 * be empty, and neither the first nor last non-blank character of
5137 * the replacement text should be a connector (| or ,).
5138 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005139 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005140 * hierarchy.
5141 */
5142xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005144 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005145 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 xmlChar type = 0;
5147
5148 SKIP_BLANKS;
5149 GROW;
5150 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005151 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005152
Owen Taylor3473f882001-02-23 17:55:21 +00005153 /* Recurse on first child */
5154 NEXT;
5155 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005156 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 SKIP_BLANKS;
5158 GROW;
5159 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005160 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005161 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 return(NULL);
5164 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005165 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005166 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005167 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005168 return(NULL);
5169 }
Owen Taylor3473f882001-02-23 17:55:21 +00005170 GROW;
5171 if (RAW == '?') {
5172 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5173 NEXT;
5174 } else if (RAW == '*') {
5175 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5176 NEXT;
5177 } else if (RAW == '+') {
5178 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5179 NEXT;
5180 } else {
5181 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 GROW;
5184 }
5185 SKIP_BLANKS;
5186 SHRINK;
5187 while (RAW != ')') {
5188 /*
5189 * Each loop we parse one separator and one element.
5190 */
5191 if (RAW == ',') {
5192 if (type == 0) type = CUR;
5193
5194 /*
5195 * Detect "Name | Name , Name" error
5196 */
5197 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005198 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005199 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005200 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005201 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005202 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005203 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 return(NULL);
5206 }
5207 NEXT;
5208
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005209 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005211 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005212 xmlFreeDocElementContent(ctxt->myDoc, last);
5213 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005214 return(NULL);
5215 }
5216 if (last == NULL) {
5217 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (ret != NULL)
5219 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 ret = cur = op;
5221 } else {
5222 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005223 if (op != NULL)
5224 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005225 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005226 if (last != NULL)
5227 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005228 cur =op;
5229 last = NULL;
5230 }
5231 } else if (RAW == '|') {
5232 if (type == 0) type = CUR;
5233
5234 /*
5235 * Detect "Name , Name | Name" error
5236 */
5237 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005238 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005239 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005240 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005241 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005242 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005243 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 return(NULL);
5246 }
5247 NEXT;
5248
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005249 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005251 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005252 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005253 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 return(NULL);
5256 }
5257 if (last == NULL) {
5258 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005259 if (ret != NULL)
5260 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 ret = cur = op;
5262 } else {
5263 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005264 if (op != NULL)
5265 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005267 if (last != NULL)
5268 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005269 cur =op;
5270 last = NULL;
5271 }
5272 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005273 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005274 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005275 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005276 return(NULL);
5277 }
5278 GROW;
5279 SKIP_BLANKS;
5280 GROW;
5281 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005282 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005283 /* Recurse on second child */
5284 NEXT;
5285 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005287 SKIP_BLANKS;
5288 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005289 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005290 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005293 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005294 return(NULL);
5295 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005296 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005297 if (RAW == '?') {
5298 last->ocur = XML_ELEMENT_CONTENT_OPT;
5299 NEXT;
5300 } else if (RAW == '*') {
5301 last->ocur = XML_ELEMENT_CONTENT_MULT;
5302 NEXT;
5303 } else if (RAW == '+') {
5304 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5305 NEXT;
5306 } else {
5307 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5308 }
5309 }
5310 SKIP_BLANKS;
5311 GROW;
5312 }
5313 if ((cur != NULL) && (last != NULL)) {
5314 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005315 if (last != NULL)
5316 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005319 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5320"Element content declaration doesn't start and stop in the same entity\n",
5321 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005322 }
Owen Taylor3473f882001-02-23 17:55:21 +00005323 NEXT;
5324 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005325 if (ret != NULL) {
5326 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5327 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5328 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5329 else
5330 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5331 }
Owen Taylor3473f882001-02-23 17:55:21 +00005332 NEXT;
5333 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005334 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005335 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005336 cur = ret;
5337 /*
5338 * Some normalization:
5339 * (a | b* | c?)* == (a | b | c)*
5340 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005341 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005342 if ((cur->c1 != NULL) &&
5343 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5344 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5345 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5346 if ((cur->c2 != NULL) &&
5347 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5348 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5349 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5350 cur = cur->c2;
5351 }
5352 }
Owen Taylor3473f882001-02-23 17:55:21 +00005353 NEXT;
5354 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005355 if (ret != NULL) {
5356 int found = 0;
5357
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005358 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5359 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5360 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005361 else
5362 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005363 /*
5364 * Some normalization:
5365 * (a | b*)+ == (a | b)*
5366 * (a | b?)+ == (a | b)*
5367 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005368 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005369 if ((cur->c1 != NULL) &&
5370 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5373 found = 1;
5374 }
5375 if ((cur->c2 != NULL) &&
5376 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5377 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5378 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5379 found = 1;
5380 }
5381 cur = cur->c2;
5382 }
5383 if (found)
5384 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 NEXT;
5387 }
5388 return(ret);
5389}
5390
5391/**
5392 * xmlParseElementContentDecl:
5393 * @ctxt: an XML parser context
5394 * @name: the name of the element being defined.
5395 * @result: the Element Content pointer will be stored here if any
5396 *
5397 * parse the declaration for an Element content either Mixed or Children,
5398 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5399 *
5400 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5401 *
5402 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5403 */
5404
5405int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005406xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005407 xmlElementContentPtr *result) {
5408
5409 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005410 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005411 int res;
5412
5413 *result = NULL;
5414
5415 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005416 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 return(-1);
5419 }
5420 NEXT;
5421 GROW;
5422 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005423 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005424 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005425 res = XML_ELEMENT_TYPE_MIXED;
5426 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005427 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005428 res = XML_ELEMENT_TYPE_ELEMENT;
5429 }
Owen Taylor3473f882001-02-23 17:55:21 +00005430 SKIP_BLANKS;
5431 *result = tree;
5432 return(res);
5433}
5434
5435/**
5436 * xmlParseElementDecl:
5437 * @ctxt: an XML parser context
5438 *
5439 * parse an Element declaration.
5440 *
5441 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5442 *
5443 * [ VC: Unique Element Type Declaration ]
5444 * No element type may be declared more than once
5445 *
5446 * Returns the type of the element, or -1 in case of error
5447 */
5448int
5449xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005450 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005451 int ret = -1;
5452 xmlElementContentPtr content = NULL;
5453
Daniel Veillard4c778d82005-01-23 17:37:44 +00005454 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005455 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005456 xmlParserInputPtr input = ctxt->input;
5457
5458 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005459 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005460 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5461 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 }
5463 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005464 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005465 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005466 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5467 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005468 return(-1);
5469 }
5470 while ((RAW == 0) && (ctxt->inputNr > 1))
5471 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5474 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005477 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005478 SKIP(5);
5479 /*
5480 * Element must always be empty.
5481 */
5482 ret = XML_ELEMENT_TYPE_EMPTY;
5483 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5484 (NXT(2) == 'Y')) {
5485 SKIP(3);
5486 /*
5487 * Element is a generic container.
5488 */
5489 ret = XML_ELEMENT_TYPE_ANY;
5490 } else if (RAW == '(') {
5491 ret = xmlParseElementContentDecl(ctxt, name, &content);
5492 } else {
5493 /*
5494 * [ WFC: PEs in Internal Subset ] error handling.
5495 */
5496 if ((RAW == '%') && (ctxt->external == 0) &&
5497 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005498 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005499 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005500 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005501 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005502 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5503 }
Owen Taylor3473f882001-02-23 17:55:21 +00005504 return(-1);
5505 }
5506
5507 SKIP_BLANKS;
5508 /*
5509 * Pop-up of finished entities.
5510 */
5511 while ((RAW == 0) && (ctxt->inputNr > 1))
5512 xmlPopInput(ctxt);
5513 SKIP_BLANKS;
5514
5515 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005516 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005517 if (content != NULL) {
5518 xmlFreeDocElementContent(ctxt->myDoc, content);
5519 }
Owen Taylor3473f882001-02-23 17:55:21 +00005520 } else {
5521 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005522 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5523 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005524 }
5525
5526 NEXT;
5527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005528 (ctxt->sax->elementDecl != NULL)) {
5529 if (content != NULL)
5530 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005531 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5532 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005533 if ((content != NULL) && (content->parent == NULL)) {
5534 /*
5535 * this is a trick: if xmlAddElementDecl is called,
5536 * instead of copying the full tree it is plugged directly
5537 * if called from the parser. Avoid duplicating the
5538 * interfaces or change the API/ABI
5539 */
5540 xmlFreeDocElementContent(ctxt->myDoc, content);
5541 }
5542 } else if (content != NULL) {
5543 xmlFreeDocElementContent(ctxt->myDoc, content);
5544 }
Owen Taylor3473f882001-02-23 17:55:21 +00005545 }
Owen Taylor3473f882001-02-23 17:55:21 +00005546 }
5547 return(ret);
5548}
5549
5550/**
Owen Taylor3473f882001-02-23 17:55:21 +00005551 * xmlParseConditionalSections
5552 * @ctxt: an XML parser context
5553 *
5554 * [61] conditionalSect ::= includeSect | ignoreSect
5555 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5556 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5557 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5558 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5559 */
5560
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005561static void
Owen Taylor3473f882001-02-23 17:55:21 +00005562xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5563 SKIP(3);
5564 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005565 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005566 SKIP(7);
5567 SKIP_BLANKS;
5568 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005569 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 } else {
5571 NEXT;
5572 }
5573 if (xmlParserDebugEntities) {
5574 if ((ctxt->input != NULL) && (ctxt->input->filename))
5575 xmlGenericError(xmlGenericErrorContext,
5576 "%s(%d): ", ctxt->input->filename,
5577 ctxt->input->line);
5578 xmlGenericError(xmlGenericErrorContext,
5579 "Entering INCLUDE Conditional Section\n");
5580 }
5581
5582 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5583 (NXT(2) != '>'))) {
5584 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005585 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005586
5587 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5588 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005589 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005590 NEXT;
5591 } else if (RAW == '%') {
5592 xmlParsePEReference(ctxt);
5593 } else
5594 xmlParseMarkupDecl(ctxt);
5595
5596 /*
5597 * Pop-up of finished entities.
5598 */
5599 while ((RAW == 0) && (ctxt->inputNr > 1))
5600 xmlPopInput(ctxt);
5601
Daniel Veillardfdc91562002-07-01 21:52:03 +00005602 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005603 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005604 break;
5605 }
5606 }
5607 if (xmlParserDebugEntities) {
5608 if ((ctxt->input != NULL) && (ctxt->input->filename))
5609 xmlGenericError(xmlGenericErrorContext,
5610 "%s(%d): ", ctxt->input->filename,
5611 ctxt->input->line);
5612 xmlGenericError(xmlGenericErrorContext,
5613 "Leaving INCLUDE Conditional Section\n");
5614 }
5615
Daniel Veillarda07050d2003-10-19 14:46:32 +00005616 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005617 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005618 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005619 int depth = 0;
5620
5621 SKIP(6);
5622 SKIP_BLANKS;
5623 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005624 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005625 } else {
5626 NEXT;
5627 }
5628 if (xmlParserDebugEntities) {
5629 if ((ctxt->input != NULL) && (ctxt->input->filename))
5630 xmlGenericError(xmlGenericErrorContext,
5631 "%s(%d): ", ctxt->input->filename,
5632 ctxt->input->line);
5633 xmlGenericError(xmlGenericErrorContext,
5634 "Entering IGNORE Conditional Section\n");
5635 }
5636
5637 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005638 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005639 * But disable SAX event generating DTD building in the meantime
5640 */
5641 state = ctxt->disableSAX;
5642 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005643 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005644 ctxt->instate = XML_PARSER_IGNORE;
5645
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005646 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005647 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5648 depth++;
5649 SKIP(3);
5650 continue;
5651 }
5652 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5653 if (--depth >= 0) SKIP(3);
5654 continue;
5655 }
5656 NEXT;
5657 continue;
5658 }
5659
5660 ctxt->disableSAX = state;
5661 ctxt->instate = instate;
5662
5663 if (xmlParserDebugEntities) {
5664 if ((ctxt->input != NULL) && (ctxt->input->filename))
5665 xmlGenericError(xmlGenericErrorContext,
5666 "%s(%d): ", ctxt->input->filename,
5667 ctxt->input->line);
5668 xmlGenericError(xmlGenericErrorContext,
5669 "Leaving IGNORE Conditional Section\n");
5670 }
5671
5672 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005673 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675
5676 if (RAW == 0)
5677 SHRINK;
5678
5679 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005681 } else {
5682 SKIP(3);
5683 }
5684}
5685
5686/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005687 * xmlParseMarkupDecl:
5688 * @ctxt: an XML parser context
5689 *
5690 * parse Markup declarations
5691 *
5692 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5693 * NotationDecl | PI | Comment
5694 *
5695 * [ VC: Proper Declaration/PE Nesting ]
5696 * Parameter-entity replacement text must be properly nested with
5697 * markup declarations. That is to say, if either the first character
5698 * or the last character of a markup declaration (markupdecl above) is
5699 * contained in the replacement text for a parameter-entity reference,
5700 * both must be contained in the same replacement text.
5701 *
5702 * [ WFC: PEs in Internal Subset ]
5703 * In the internal DTD subset, parameter-entity references can occur
5704 * only where markup declarations can occur, not within markup declarations.
5705 * (This does not apply to references that occur in external parameter
5706 * entities or to the external subset.)
5707 */
5708void
5709xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5710 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005711 if (CUR == '<') {
5712 if (NXT(1) == '!') {
5713 switch (NXT(2)) {
5714 case 'E':
5715 if (NXT(3) == 'L')
5716 xmlParseElementDecl(ctxt);
5717 else if (NXT(3) == 'N')
5718 xmlParseEntityDecl(ctxt);
5719 break;
5720 case 'A':
5721 xmlParseAttributeListDecl(ctxt);
5722 break;
5723 case 'N':
5724 xmlParseNotationDecl(ctxt);
5725 break;
5726 case '-':
5727 xmlParseComment(ctxt);
5728 break;
5729 default:
5730 /* there is an error but it will be detected later */
5731 break;
5732 }
5733 } else if (NXT(1) == '?') {
5734 xmlParsePI(ctxt);
5735 }
5736 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005737 /*
5738 * This is only for internal subset. On external entities,
5739 * the replacement is done before parsing stage
5740 */
5741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5742 xmlParsePEReference(ctxt);
5743
5744 /*
5745 * Conditional sections are allowed from entities included
5746 * by PE References in the internal subset.
5747 */
5748 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5749 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5750 xmlParseConditionalSections(ctxt);
5751 }
5752 }
5753
5754 ctxt->instate = XML_PARSER_DTD;
5755}
5756
5757/**
5758 * xmlParseTextDecl:
5759 * @ctxt: an XML parser context
5760 *
5761 * parse an XML declaration header for external entities
5762 *
5763 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5764 *
5765 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5766 */
5767
5768void
5769xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5770 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005771 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772
5773 /*
5774 * We know that '<?xml' is here.
5775 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005776 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005777 SKIP(5);
5778 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005779 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005780 return;
5781 }
5782
William M. Brack76e95df2003-10-18 16:20:14 +00005783 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5785 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005786 }
5787 SKIP_BLANKS;
5788
5789 /*
5790 * We may have the VersionInfo here.
5791 */
5792 version = xmlParseVersionInfo(ctxt);
5793 if (version == NULL)
5794 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005795 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005796 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5798 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005799 }
5800 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005801 ctxt->input->version = version;
5802
5803 /*
5804 * We must have the encoding declaration
5805 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005806 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005807 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5808 /*
5809 * The XML REC instructs us to stop parsing right here
5810 */
5811 return;
5812 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005813 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5814 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5815 "Missing encoding in text declaration\n");
5816 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005817
5818 SKIP_BLANKS;
5819 if ((RAW == '?') && (NXT(1) == '>')) {
5820 SKIP(2);
5821 } else if (RAW == '>') {
5822 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005823 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005824 NEXT;
5825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005826 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005827 MOVETO_ENDTAG(CUR_PTR);
5828 NEXT;
5829 }
5830}
5831
5832/**
Owen Taylor3473f882001-02-23 17:55:21 +00005833 * xmlParseExternalSubset:
5834 * @ctxt: an XML parser context
5835 * @ExternalID: the external identifier
5836 * @SystemID: the system identifier (or URL)
5837 *
5838 * parse Markup declarations from an external subset
5839 *
5840 * [30] extSubset ::= textDecl? extSubsetDecl
5841 *
5842 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5843 */
5844void
5845xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5846 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005847 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005849 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005850 xmlParseTextDecl(ctxt);
5851 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5852 /*
5853 * The XML REC instructs us to stop parsing right here
5854 */
5855 ctxt->instate = XML_PARSER_EOF;
5856 return;
5857 }
5858 }
5859 if (ctxt->myDoc == NULL) {
5860 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5861 }
5862 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5863 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5864
5865 ctxt->instate = XML_PARSER_DTD;
5866 ctxt->external = 1;
5867 while (((RAW == '<') && (NXT(1) == '?')) ||
5868 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005869 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005870 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005871 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005872
5873 GROW;
5874 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5875 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005876 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005877 NEXT;
5878 } else if (RAW == '%') {
5879 xmlParsePEReference(ctxt);
5880 } else
5881 xmlParseMarkupDecl(ctxt);
5882
5883 /*
5884 * Pop-up of finished entities.
5885 */
5886 while ((RAW == 0) && (ctxt->inputNr > 1))
5887 xmlPopInput(ctxt);
5888
Daniel Veillardfdc91562002-07-01 21:52:03 +00005889 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 break;
5892 }
5893 }
5894
5895 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005896 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 }
5898
5899}
5900
5901/**
5902 * xmlParseReference:
5903 * @ctxt: an XML parser context
5904 *
5905 * parse and handle entity references in content, depending on the SAX
5906 * interface, this may end-up in a call to character() if this is a
5907 * CharRef, a predefined entity, if there is no reference() callback.
5908 * or if the parser was asked to switch to that mode.
5909 *
5910 * [67] Reference ::= EntityRef | CharRef
5911 */
5912void
5913xmlParseReference(xmlParserCtxtPtr ctxt) {
5914 xmlEntityPtr ent;
5915 xmlChar *val;
5916 if (RAW != '&') return;
5917
5918 if (NXT(1) == '#') {
5919 int i = 0;
5920 xmlChar out[10];
5921 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005922 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005923
5924 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5925 /*
5926 * So we are using non-UTF-8 buffers
5927 * Check that the char fit on 8bits, if not
5928 * generate a CharRef.
5929 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005930 if (value <= 0xFF) {
5931 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005932 out[1] = 0;
5933 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5934 (!ctxt->disableSAX))
5935 ctxt->sax->characters(ctxt->userData, out, 1);
5936 } else {
5937 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005938 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005940 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005941 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5942 (!ctxt->disableSAX))
5943 ctxt->sax->reference(ctxt->userData, out);
5944 }
5945 } else {
5946 /*
5947 * Just encode the value in UTF-8
5948 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005949 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005950 out[i] = 0;
5951 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5952 (!ctxt->disableSAX))
5953 ctxt->sax->characters(ctxt->userData, out, i);
5954 }
5955 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005956 int was_checked;
5957
Owen Taylor3473f882001-02-23 17:55:21 +00005958 ent = xmlParseEntityRef(ctxt);
5959 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005960 if (!ctxt->wellFormed)
5961 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005962 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 if ((ent->name != NULL) &&
5964 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5965 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005966 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005967
5968
5969 /*
5970 * The first reference to the entity trigger a parsing phase
5971 * where the ent->children is filled with the result from
5972 * the parsing.
5973 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005974 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005975 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005976
Owen Taylor3473f882001-02-23 17:55:21 +00005977 value = ent->content;
5978
5979 /*
5980 * Check that this entity is well formed
5981 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005982 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005983 (value[1] == 0) && (value[0] == '<') &&
5984 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5985 /*
5986 * DONE: get definite answer on this !!!
5987 * Lots of entity decls are used to declare a single
5988 * char
5989 * <!ENTITY lt "<">
5990 * Which seems to be valid since
5991 * 2.4: The ampersand character (&) and the left angle
5992 * bracket (<) may appear in their literal form only
5993 * when used ... They are also legal within the literal
5994 * entity value of an internal entity declaration;i
5995 * see "4.3.2 Well-Formed Parsed Entities".
5996 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5997 * Looking at the OASIS test suite and James Clark
5998 * tests, this is broken. However the XML REC uses
5999 * it. Is the XML REC not well-formed ????
6000 * This is a hack to avoid this problem
6001 *
6002 * ANSWER: since lt gt amp .. are already defined,
6003 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006004 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006005 * is lousy but acceptable.
6006 */
6007 list = xmlNewDocText(ctxt->myDoc, value);
6008 if (list != NULL) {
6009 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6010 (ent->children == NULL)) {
6011 ent->children = list;
6012 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006013 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006014 list->parent = (xmlNodePtr) ent;
6015 } else {
6016 xmlFreeNodeList(list);
6017 }
6018 } else if (list != NULL) {
6019 xmlFreeNodeList(list);
6020 }
6021 } else {
6022 /*
6023 * 4.3.2: An internal general parsed entity is well-formed
6024 * if its replacement text matches the production labeled
6025 * content.
6026 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006027
6028 void *user_data;
6029 /*
6030 * This is a bit hackish but this seems the best
6031 * way to make sure both SAX and DOM entity support
6032 * behaves okay.
6033 */
6034 if (ctxt->userData == ctxt)
6035 user_data = NULL;
6036 else
6037 user_data = ctxt->userData;
6038
Owen Taylor3473f882001-02-23 17:55:21 +00006039 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6040 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006041 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6042 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006043 ctxt->depth--;
6044 } else if (ent->etype ==
6045 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6046 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006047 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006048 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006049 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006050 ctxt->depth--;
6051 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006052 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006053 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6054 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006055 }
6056 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006057 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006058 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006059 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006060 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6061 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006062 (ent->children == NULL)) {
6063 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006064 if (ctxt->replaceEntities) {
6065 /*
6066 * Prune it directly in the generated document
6067 * except for single text nodes.
6068 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006069 if (((list->type == XML_TEXT_NODE) &&
6070 (list->next == NULL)) ||
6071 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006072 list->parent = (xmlNodePtr) ent;
6073 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006074 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006075 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006076 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006077 while (list != NULL) {
6078 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006079 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006080 if (list->next == NULL)
6081 ent->last = list;
6082 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006083 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006084 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006085#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006086 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6087 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006088#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006089 }
6090 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006091 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006092 while (list != NULL) {
6093 list->parent = (xmlNodePtr) ent;
6094 if (list->next == NULL)
6095 ent->last = list;
6096 list = list->next;
6097 }
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
6099 } else {
6100 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006101 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006102 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006103 } else if ((ret != XML_ERR_OK) &&
6104 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006105 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6106 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006107 } else if (list != NULL) {
6108 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006109 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006110 }
6111 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006112 ent->checked = 1;
6113 }
6114
6115 if (ent->children == NULL) {
6116 /*
6117 * Probably running in SAX mode and the callbacks don't
6118 * build the entity content. So unless we already went
6119 * though parsing for first checking go though the entity
6120 * content to generate callbacks associated to the entity
6121 */
6122 if (was_checked == 1) {
6123 void *user_data;
6124 /*
6125 * This is a bit hackish but this seems the best
6126 * way to make sure both SAX and DOM entity support
6127 * behaves okay.
6128 */
6129 if (ctxt->userData == ctxt)
6130 user_data = NULL;
6131 else
6132 user_data = ctxt->userData;
6133
6134 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6135 ctxt->depth++;
6136 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6137 ent->content, user_data, NULL);
6138 ctxt->depth--;
6139 } else if (ent->etype ==
6140 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6141 ctxt->depth++;
6142 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6143 ctxt->sax, user_data, ctxt->depth,
6144 ent->URI, ent->ExternalID, NULL);
6145 ctxt->depth--;
6146 } else {
6147 ret = XML_ERR_ENTITY_PE_INTERNAL;
6148 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6149 "invalid entity type found\n", NULL);
6150 }
6151 if (ret == XML_ERR_ENTITY_LOOP) {
6152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6153 return;
6154 }
6155 }
6156 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6157 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6158 /*
6159 * Entity reference callback comes second, it's somewhat
6160 * superfluous but a compatibility to historical behaviour
6161 */
6162 ctxt->sax->reference(ctxt->userData, ent->name);
6163 }
6164 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
6166 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006167 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006168 /*
6169 * Create a node.
6170 */
6171 ctxt->sax->reference(ctxt->userData, ent->name);
6172 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006173 }
6174 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006175 /*
6176 * There is a problem on the handling of _private for entities
6177 * (bug 155816): Should we copy the content of the field from
6178 * the entity (possibly overwriting some value set by the user
6179 * when a copy is created), should we leave it alone, or should
6180 * we try to take care of different situations? The problem
6181 * is exacerbated by the usage of this field by the xmlReader.
6182 * To fix this bug, we look at _private on the created node
6183 * and, if it's NULL, we copy in whatever was in the entity.
6184 * If it's not NULL we leave it alone. This is somewhat of a
6185 * hack - maybe we should have further tests to determine
6186 * what to do.
6187 */
Owen Taylor3473f882001-02-23 17:55:21 +00006188 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6189 /*
6190 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006191 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006192 * In the first occurrence list contains the replacement.
6193 * progressive == 2 means we are operating on the Reader
6194 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006195 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006196 if (((list == NULL) && (ent->owner == 0)) ||
6197 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006198 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006199
6200 /*
6201 * when operating on a reader, the entities definitions
6202 * are always owning the entities subtree.
6203 if (ctxt->parseMode == XML_PARSE_READER)
6204 ent->owner = 1;
6205 */
6206
Daniel Veillard62f313b2001-07-04 19:49:14 +00006207 cur = ent->children;
6208 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006209 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006210 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006211 if (nw->_private == NULL)
6212 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006213 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006214 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006215 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006216 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006217 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006218 if (cur == ent->last) {
6219 /*
6220 * needed to detect some strange empty
6221 * node cases in the reader tests
6222 */
6223 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006224 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006225 (nw->type == XML_ELEMENT_NODE) &&
6226 (nw->children == NULL))
6227 nw->extra = 1;
6228
Daniel Veillard62f313b2001-07-04 19:49:14 +00006229 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006230 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006231 cur = cur->next;
6232 }
Daniel Veillard81273902003-09-30 00:43:48 +00006233#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006234 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006235 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006236#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006237 } else if (list == NULL) {
6238 xmlNodePtr nw = NULL, cur, next, last,
6239 firstChild = NULL;
6240 /*
6241 * Copy the entity child list and make it the new
6242 * entity child list. The goal is to make sure any
6243 * ID or REF referenced will be the one from the
6244 * document content and not the entity copy.
6245 */
6246 cur = ent->children;
6247 ent->children = NULL;
6248 last = ent->last;
6249 ent->last = NULL;
6250 while (cur != NULL) {
6251 next = cur->next;
6252 cur->next = NULL;
6253 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006254 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006255 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006256 if (nw->_private == NULL)
6257 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006258 if (firstChild == NULL){
6259 firstChild = cur;
6260 }
6261 xmlAddChild((xmlNodePtr) ent, nw);
6262 xmlAddChild(ctxt->node, cur);
6263 }
6264 if (cur == last)
6265 break;
6266 cur = next;
6267 }
6268 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006269#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006270 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6271 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006272#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006273 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006274 const xmlChar *nbktext;
6275
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 /*
6277 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006278 * node with a possible previous text one which
6279 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006280 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006281 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6282 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006283 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006284 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006285 if ((ent->last != ent->children) &&
6286 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006287 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006288 xmlAddChildList(ctxt->node, ent->children);
6289 }
6290
Owen Taylor3473f882001-02-23 17:55:21 +00006291 /*
6292 * This is to avoid a nasty side effect, see
6293 * characters() in SAX.c
6294 */
6295 ctxt->nodemem = 0;
6296 ctxt->nodelen = 0;
6297 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006298 }
6299 }
6300 } else {
6301 val = ent->content;
6302 if (val == NULL) return;
6303 /*
6304 * inline the entity.
6305 */
6306 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6307 (!ctxt->disableSAX))
6308 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6309 }
6310 }
6311}
6312
6313/**
6314 * xmlParseEntityRef:
6315 * @ctxt: an XML parser context
6316 *
6317 * parse ENTITY references declarations
6318 *
6319 * [68] EntityRef ::= '&' Name ';'
6320 *
6321 * [ WFC: Entity Declared ]
6322 * In a document without any DTD, a document with only an internal DTD
6323 * subset which contains no parameter entity references, or a document
6324 * with "standalone='yes'", the Name given in the entity reference
6325 * must match that in an entity declaration, except that well-formed
6326 * documents need not declare any of the following entities: amp, lt,
6327 * gt, apos, quot. The declaration of a parameter entity must precede
6328 * any reference to it. Similarly, the declaration of a general entity
6329 * must precede any reference to it which appears in a default value in an
6330 * attribute-list declaration. Note that if entities are declared in the
6331 * external subset or in external parameter entities, a non-validating
6332 * processor is not obligated to read and process their declarations;
6333 * for such documents, the rule that an entity must be declared is a
6334 * well-formedness constraint only if standalone='yes'.
6335 *
6336 * [ WFC: Parsed Entity ]
6337 * An entity reference must not contain the name of an unparsed entity
6338 *
6339 * Returns the xmlEntityPtr if found, or NULL otherwise.
6340 */
6341xmlEntityPtr
6342xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006343 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006344 xmlEntityPtr ent = NULL;
6345
6346 GROW;
6347
6348 if (RAW == '&') {
6349 NEXT;
6350 name = xmlParseName(ctxt);
6351 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006352 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6353 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006354 } else {
6355 if (RAW == ';') {
6356 NEXT;
6357 /*
6358 * Ask first SAX for entity resolution, otherwise try the
6359 * predefined set.
6360 */
6361 if (ctxt->sax != NULL) {
6362 if (ctxt->sax->getEntity != NULL)
6363 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006364 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006365 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006366 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6367 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006368 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006369 }
Owen Taylor3473f882001-02-23 17:55:21 +00006370 }
6371 /*
6372 * [ WFC: Entity Declared ]
6373 * In a document without any DTD, a document with only an
6374 * internal DTD subset which contains no parameter entity
6375 * references, or a document with "standalone='yes'", the
6376 * Name given in the entity reference must match that in an
6377 * entity declaration, except that well-formed documents
6378 * need not declare any of the following entities: amp, lt,
6379 * gt, apos, quot.
6380 * The declaration of a parameter entity must precede any
6381 * reference to it.
6382 * Similarly, the declaration of a general entity must
6383 * precede any reference to it which appears in a default
6384 * value in an attribute-list declaration. Note that if
6385 * entities are declared in the external subset or in
6386 * external parameter entities, a non-validating processor
6387 * is not obligated to read and process their declarations;
6388 * for such documents, the rule that an entity must be
6389 * declared is a well-formedness constraint only if
6390 * standalone='yes'.
6391 */
6392 if (ent == NULL) {
6393 if ((ctxt->standalone == 1) ||
6394 ((ctxt->hasExternalSubset == 0) &&
6395 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006396 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006397 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006398 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006399 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006400 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006401 if ((ctxt->inSubset == 0) &&
6402 (ctxt->sax != NULL) &&
6403 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006404 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006405 }
Owen Taylor3473f882001-02-23 17:55:21 +00006406 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006407 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006408 }
6409
6410 /*
6411 * [ WFC: Parsed Entity ]
6412 * An entity reference must not contain the name of an
6413 * unparsed entity
6414 */
6415 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006416 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006417 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006418 }
6419
6420 /*
6421 * [ WFC: No External Entity References ]
6422 * Attribute values cannot contain direct or indirect
6423 * entity references to external entities.
6424 */
6425 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6426 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6428 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006429 }
6430 /*
6431 * [ WFC: No < in Attribute Values ]
6432 * The replacement text of any entity referred to directly or
6433 * indirectly in an attribute value (other than "&lt;") must
6434 * not contain a <.
6435 */
6436 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6437 (ent != NULL) &&
6438 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6439 (ent->content != NULL) &&
6440 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006441 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006442 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006443 }
6444
6445 /*
6446 * Internal check, no parameter entities here ...
6447 */
6448 else {
6449 switch (ent->etype) {
6450 case XML_INTERNAL_PARAMETER_ENTITY:
6451 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006452 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6453 "Attempt to reference the parameter entity '%s'\n",
6454 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006455 break;
6456 default:
6457 break;
6458 }
6459 }
6460
6461 /*
6462 * [ WFC: No Recursion ]
6463 * A parsed entity must not contain a recursive reference
6464 * to itself, either directly or indirectly.
6465 * Done somewhere else
6466 */
6467
6468 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006470 }
Owen Taylor3473f882001-02-23 17:55:21 +00006471 }
6472 }
6473 return(ent);
6474}
6475
6476/**
6477 * xmlParseStringEntityRef:
6478 * @ctxt: an XML parser context
6479 * @str: a pointer to an index in the string
6480 *
6481 * parse ENTITY references declarations, but this version parses it from
6482 * a string value.
6483 *
6484 * [68] EntityRef ::= '&' Name ';'
6485 *
6486 * [ WFC: Entity Declared ]
6487 * In a document without any DTD, a document with only an internal DTD
6488 * subset which contains no parameter entity references, or a document
6489 * with "standalone='yes'", the Name given in the entity reference
6490 * must match that in an entity declaration, except that well-formed
6491 * documents need not declare any of the following entities: amp, lt,
6492 * gt, apos, quot. The declaration of a parameter entity must precede
6493 * any reference to it. Similarly, the declaration of a general entity
6494 * must precede any reference to it which appears in a default value in an
6495 * attribute-list declaration. Note that if entities are declared in the
6496 * external subset or in external parameter entities, a non-validating
6497 * processor is not obligated to read and process their declarations;
6498 * for such documents, the rule that an entity must be declared is a
6499 * well-formedness constraint only if standalone='yes'.
6500 *
6501 * [ WFC: Parsed Entity ]
6502 * An entity reference must not contain the name of an unparsed entity
6503 *
6504 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6505 * is updated to the current location in the string.
6506 */
6507xmlEntityPtr
6508xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6509 xmlChar *name;
6510 const xmlChar *ptr;
6511 xmlChar cur;
6512 xmlEntityPtr ent = NULL;
6513
6514 if ((str == NULL) || (*str == NULL))
6515 return(NULL);
6516 ptr = *str;
6517 cur = *ptr;
6518 if (cur == '&') {
6519 ptr++;
6520 cur = *ptr;
6521 name = xmlParseStringName(ctxt, &ptr);
6522 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006523 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6524 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006525 } else {
6526 if (*ptr == ';') {
6527 ptr++;
6528 /*
6529 * Ask first SAX for entity resolution, otherwise try the
6530 * predefined set.
6531 */
6532 if (ctxt->sax != NULL) {
6533 if (ctxt->sax->getEntity != NULL)
6534 ent = ctxt->sax->getEntity(ctxt->userData, name);
6535 if (ent == NULL)
6536 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006537 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006538 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006539 }
Owen Taylor3473f882001-02-23 17:55:21 +00006540 }
6541 /*
6542 * [ WFC: Entity Declared ]
6543 * In a document without any DTD, a document with only an
6544 * internal DTD subset which contains no parameter entity
6545 * references, or a document with "standalone='yes'", the
6546 * Name given in the entity reference must match that in an
6547 * entity declaration, except that well-formed documents
6548 * need not declare any of the following entities: amp, lt,
6549 * gt, apos, quot.
6550 * The declaration of a parameter entity must precede any
6551 * reference to it.
6552 * Similarly, the declaration of a general entity must
6553 * precede any reference to it which appears in a default
6554 * value in an attribute-list declaration. Note that if
6555 * entities are declared in the external subset or in
6556 * external parameter entities, a non-validating processor
6557 * is not obligated to read and process their declarations;
6558 * for such documents, the rule that an entity must be
6559 * declared is a well-formedness constraint only if
6560 * standalone='yes'.
6561 */
6562 if (ent == NULL) {
6563 if ((ctxt->standalone == 1) ||
6564 ((ctxt->hasExternalSubset == 0) &&
6565 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006566 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006567 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006569 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006570 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006571 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006572 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006573 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006574 }
6575
6576 /*
6577 * [ WFC: Parsed Entity ]
6578 * An entity reference must not contain the name of an
6579 * unparsed entity
6580 */
6581 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006582 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006583 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006584 }
6585
6586 /*
6587 * [ WFC: No External Entity References ]
6588 * Attribute values cannot contain direct or indirect
6589 * entity references to external entities.
6590 */
6591 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6592 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006593 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006594 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006595 }
6596 /*
6597 * [ WFC: No < in Attribute Values ]
6598 * The replacement text of any entity referred to directly or
6599 * indirectly in an attribute value (other than "&lt;") must
6600 * not contain a <.
6601 */
6602 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6603 (ent != NULL) &&
6604 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6605 (ent->content != NULL) &&
6606 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006607 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6608 "'<' in entity '%s' is not allowed in attributes values\n",
6609 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006610 }
6611
6612 /*
6613 * Internal check, no parameter entities here ...
6614 */
6615 else {
6616 switch (ent->etype) {
6617 case XML_INTERNAL_PARAMETER_ENTITY:
6618 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006619 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6620 "Attempt to reference the parameter entity '%s'\n",
6621 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006622 break;
6623 default:
6624 break;
6625 }
6626 }
6627
6628 /*
6629 * [ WFC: No Recursion ]
6630 * A parsed entity must not contain a recursive reference
6631 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006632 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006633 */
6634
6635 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006636 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006637 }
6638 xmlFree(name);
6639 }
6640 }
6641 *str = ptr;
6642 return(ent);
6643}
6644
6645/**
6646 * xmlParsePEReference:
6647 * @ctxt: an XML parser context
6648 *
6649 * parse PEReference declarations
6650 * The entity content is handled directly by pushing it's content as
6651 * a new input stream.
6652 *
6653 * [69] PEReference ::= '%' Name ';'
6654 *
6655 * [ WFC: No Recursion ]
6656 * A parsed entity must not contain a recursive
6657 * reference to itself, either directly or indirectly.
6658 *
6659 * [ WFC: Entity Declared ]
6660 * In a document without any DTD, a document with only an internal DTD
6661 * subset which contains no parameter entity references, or a document
6662 * with "standalone='yes'", ... ... The declaration of a parameter
6663 * entity must precede any reference to it...
6664 *
6665 * [ VC: Entity Declared ]
6666 * In a document with an external subset or external parameter entities
6667 * with "standalone='no'", ... ... The declaration of a parameter entity
6668 * must precede any reference to it...
6669 *
6670 * [ WFC: In DTD ]
6671 * Parameter-entity references may only appear in the DTD.
6672 * NOTE: misleading but this is handled.
6673 */
6674void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006675xmlParsePEReference(xmlParserCtxtPtr ctxt)
6676{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006677 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006678 xmlEntityPtr entity = NULL;
6679 xmlParserInputPtr input;
6680
6681 if (RAW == '%') {
6682 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006683 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006684 if (name == NULL) {
6685 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6686 "xmlParsePEReference: no name\n");
6687 } else {
6688 if (RAW == ';') {
6689 NEXT;
6690 if ((ctxt->sax != NULL) &&
6691 (ctxt->sax->getParameterEntity != NULL))
6692 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6693 name);
6694 if (entity == NULL) {
6695 /*
6696 * [ WFC: Entity Declared ]
6697 * In a document without any DTD, a document with only an
6698 * internal DTD subset which contains no parameter entity
6699 * references, or a document with "standalone='yes'", ...
6700 * ... The declaration of a parameter entity must precede
6701 * any reference to it...
6702 */
6703 if ((ctxt->standalone == 1) ||
6704 ((ctxt->hasExternalSubset == 0) &&
6705 (ctxt->hasPErefs == 0))) {
6706 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6707 "PEReference: %%%s; not found\n",
6708 name);
6709 } else {
6710 /*
6711 * [ VC: Entity Declared ]
6712 * In a document with an external subset or external
6713 * parameter entities with "standalone='no'", ...
6714 * ... The declaration of a parameter entity must
6715 * precede any reference to it...
6716 */
6717 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6718 "PEReference: %%%s; not found\n",
6719 name, NULL);
6720 ctxt->valid = 0;
6721 }
6722 } else {
6723 /*
6724 * Internal checking in case the entity quest barfed
6725 */
6726 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6727 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6728 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6729 "Internal: %%%s; is not a parameter entity\n",
6730 name, NULL);
6731 } else if (ctxt->input->free != deallocblankswrapper) {
6732 input =
6733 xmlNewBlanksWrapperInputStream(ctxt, entity);
6734 xmlPushInput(ctxt, input);
6735 } else {
6736 /*
6737 * TODO !!!
6738 * handle the extra spaces added before and after
6739 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6740 */
6741 input = xmlNewEntityInputStream(ctxt, entity);
6742 xmlPushInput(ctxt, input);
6743 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006744 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006745 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006746 xmlParseTextDecl(ctxt);
6747 if (ctxt->errNo ==
6748 XML_ERR_UNSUPPORTED_ENCODING) {
6749 /*
6750 * The XML REC instructs us to stop parsing
6751 * right here
6752 */
6753 ctxt->instate = XML_PARSER_EOF;
6754 return;
6755 }
6756 }
6757 }
6758 }
6759 ctxt->hasPErefs = 1;
6760 } else {
6761 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6762 }
6763 }
Owen Taylor3473f882001-02-23 17:55:21 +00006764 }
6765}
6766
6767/**
6768 * xmlParseStringPEReference:
6769 * @ctxt: an XML parser context
6770 * @str: a pointer to an index in the string
6771 *
6772 * parse PEReference declarations
6773 *
6774 * [69] PEReference ::= '%' Name ';'
6775 *
6776 * [ WFC: No Recursion ]
6777 * A parsed entity must not contain a recursive
6778 * reference to itself, either directly or indirectly.
6779 *
6780 * [ WFC: Entity Declared ]
6781 * In a document without any DTD, a document with only an internal DTD
6782 * subset which contains no parameter entity references, or a document
6783 * with "standalone='yes'", ... ... The declaration of a parameter
6784 * entity must precede any reference to it...
6785 *
6786 * [ VC: Entity Declared ]
6787 * In a document with an external subset or external parameter entities
6788 * with "standalone='no'", ... ... The declaration of a parameter entity
6789 * must precede any reference to it...
6790 *
6791 * [ WFC: In DTD ]
6792 * Parameter-entity references may only appear in the DTD.
6793 * NOTE: misleading but this is handled.
6794 *
6795 * Returns the string of the entity content.
6796 * str is updated to the current value of the index
6797 */
6798xmlEntityPtr
6799xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6800 const xmlChar *ptr;
6801 xmlChar cur;
6802 xmlChar *name;
6803 xmlEntityPtr entity = NULL;
6804
6805 if ((str == NULL) || (*str == NULL)) return(NULL);
6806 ptr = *str;
6807 cur = *ptr;
6808 if (cur == '%') {
6809 ptr++;
6810 cur = *ptr;
6811 name = xmlParseStringName(ctxt, &ptr);
6812 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006813 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6814 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006815 } else {
6816 cur = *ptr;
6817 if (cur == ';') {
6818 ptr++;
6819 cur = *ptr;
6820 if ((ctxt->sax != NULL) &&
6821 (ctxt->sax->getParameterEntity != NULL))
6822 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6823 name);
6824 if (entity == NULL) {
6825 /*
6826 * [ WFC: Entity Declared ]
6827 * In a document without any DTD, a document with only an
6828 * internal DTD subset which contains no parameter entity
6829 * references, or a document with "standalone='yes'", ...
6830 * ... The declaration of a parameter entity must precede
6831 * any reference to it...
6832 */
6833 if ((ctxt->standalone == 1) ||
6834 ((ctxt->hasExternalSubset == 0) &&
6835 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006836 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006837 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006838 } else {
6839 /*
6840 * [ VC: Entity Declared ]
6841 * In a document with an external subset or external
6842 * parameter entities with "standalone='no'", ...
6843 * ... The declaration of a parameter entity must
6844 * precede any reference to it...
6845 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006846 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6847 "PEReference: %%%s; not found\n",
6848 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006849 ctxt->valid = 0;
6850 }
6851 } else {
6852 /*
6853 * Internal checking in case the entity quest barfed
6854 */
6855 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6856 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006857 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6858 "%%%s; is not a parameter entity\n",
6859 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 }
6861 }
6862 ctxt->hasPErefs = 1;
6863 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006864 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006865 }
6866 xmlFree(name);
6867 }
6868 }
6869 *str = ptr;
6870 return(entity);
6871}
6872
6873/**
6874 * xmlParseDocTypeDecl:
6875 * @ctxt: an XML parser context
6876 *
6877 * parse a DOCTYPE declaration
6878 *
6879 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6880 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6881 *
6882 * [ VC: Root Element Type ]
6883 * The Name in the document type declaration must match the element
6884 * type of the root element.
6885 */
6886
6887void
6888xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006889 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006890 xmlChar *ExternalID = NULL;
6891 xmlChar *URI = NULL;
6892
6893 /*
6894 * We know that '<!DOCTYPE' has been detected.
6895 */
6896 SKIP(9);
6897
6898 SKIP_BLANKS;
6899
6900 /*
6901 * Parse the DOCTYPE name.
6902 */
6903 name = xmlParseName(ctxt);
6904 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006905 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6906 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006907 }
6908 ctxt->intSubName = name;
6909
6910 SKIP_BLANKS;
6911
6912 /*
6913 * Check for SystemID and ExternalID
6914 */
6915 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6916
6917 if ((URI != NULL) || (ExternalID != NULL)) {
6918 ctxt->hasExternalSubset = 1;
6919 }
6920 ctxt->extSubURI = URI;
6921 ctxt->extSubSystem = ExternalID;
6922
6923 SKIP_BLANKS;
6924
6925 /*
6926 * Create and update the internal subset.
6927 */
6928 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6929 (!ctxt->disableSAX))
6930 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6931
6932 /*
6933 * Is there any internal subset declarations ?
6934 * they are handled separately in xmlParseInternalSubset()
6935 */
6936 if (RAW == '[')
6937 return;
6938
6939 /*
6940 * We should be at the end of the DOCTYPE declaration.
6941 */
6942 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006943 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006944 }
6945 NEXT;
6946}
6947
6948/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006949 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006950 * @ctxt: an XML parser context
6951 *
6952 * parse the internal subset declaration
6953 *
6954 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6955 */
6956
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006957static void
Owen Taylor3473f882001-02-23 17:55:21 +00006958xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6959 /*
6960 * Is there any DTD definition ?
6961 */
6962 if (RAW == '[') {
6963 ctxt->instate = XML_PARSER_DTD;
6964 NEXT;
6965 /*
6966 * Parse the succession of Markup declarations and
6967 * PEReferences.
6968 * Subsequence (markupdecl | PEReference | S)*
6969 */
6970 while (RAW != ']') {
6971 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006972 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006973
6974 SKIP_BLANKS;
6975 xmlParseMarkupDecl(ctxt);
6976 xmlParsePEReference(ctxt);
6977
6978 /*
6979 * Pop-up of finished entities.
6980 */
6981 while ((RAW == 0) && (ctxt->inputNr > 1))
6982 xmlPopInput(ctxt);
6983
6984 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006985 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006986 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006987 break;
6988 }
6989 }
6990 if (RAW == ']') {
6991 NEXT;
6992 SKIP_BLANKS;
6993 }
6994 }
6995
6996 /*
6997 * We should be at the end of the DOCTYPE declaration.
6998 */
6999 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007000 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002 NEXT;
7003}
7004
Daniel Veillard81273902003-09-30 00:43:48 +00007005#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007006/**
7007 * xmlParseAttribute:
7008 * @ctxt: an XML parser context
7009 * @value: a xmlChar ** used to store the value of the attribute
7010 *
7011 * parse an attribute
7012 *
7013 * [41] Attribute ::= Name Eq AttValue
7014 *
7015 * [ WFC: No External Entity References ]
7016 * Attribute values cannot contain direct or indirect entity references
7017 * to external entities.
7018 *
7019 * [ WFC: No < in Attribute Values ]
7020 * The replacement text of any entity referred to directly or indirectly in
7021 * an attribute value (other than "&lt;") must not contain a <.
7022 *
7023 * [ VC: Attribute Value Type ]
7024 * The attribute must have been declared; the value must be of the type
7025 * declared for it.
7026 *
7027 * [25] Eq ::= S? '=' S?
7028 *
7029 * With namespace:
7030 *
7031 * [NS 11] Attribute ::= QName Eq AttValue
7032 *
7033 * Also the case QName == xmlns:??? is handled independently as a namespace
7034 * definition.
7035 *
7036 * Returns the attribute name, and the value in *value.
7037 */
7038
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007039const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007040xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007041 const xmlChar *name;
7042 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007043
7044 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007045 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007046 name = xmlParseName(ctxt);
7047 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007048 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007049 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007050 return(NULL);
7051 }
7052
7053 /*
7054 * read the value
7055 */
7056 SKIP_BLANKS;
7057 if (RAW == '=') {
7058 NEXT;
7059 SKIP_BLANKS;
7060 val = xmlParseAttValue(ctxt);
7061 ctxt->instate = XML_PARSER_CONTENT;
7062 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007063 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007064 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007065 return(NULL);
7066 }
7067
7068 /*
7069 * Check that xml:lang conforms to the specification
7070 * No more registered as an error, just generate a warning now
7071 * since this was deprecated in XML second edition
7072 */
7073 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7074 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007075 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7076 "Malformed value for xml:lang : %s\n",
7077 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007078 }
7079 }
7080
7081 /*
7082 * Check that xml:space conforms to the specification
7083 */
7084 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7085 if (xmlStrEqual(val, BAD_CAST "default"))
7086 *(ctxt->space) = 0;
7087 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7088 *(ctxt->space) = 1;
7089 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007090 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007091"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007092 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007093 }
7094 }
7095
7096 *value = val;
7097 return(name);
7098}
7099
7100/**
7101 * xmlParseStartTag:
7102 * @ctxt: an XML parser context
7103 *
7104 * parse a start of tag either for rule element or
7105 * EmptyElement. In both case we don't parse the tag closing chars.
7106 *
7107 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7108 *
7109 * [ WFC: Unique Att Spec ]
7110 * No attribute name may appear more than once in the same start-tag or
7111 * empty-element tag.
7112 *
7113 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7114 *
7115 * [ WFC: Unique Att Spec ]
7116 * No attribute name may appear more than once in the same start-tag or
7117 * empty-element tag.
7118 *
7119 * With namespace:
7120 *
7121 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7122 *
7123 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7124 *
7125 * Returns the element name parsed
7126 */
7127
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007128const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007129xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007130 const xmlChar *name;
7131 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007132 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007133 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007134 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007135 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007136 int i;
7137
7138 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007139 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007140
7141 name = xmlParseName(ctxt);
7142 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007143 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007144 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007145 return(NULL);
7146 }
7147
7148 /*
7149 * Now parse the attributes, it ends up with the ending
7150 *
7151 * (S Attribute)* S?
7152 */
7153 SKIP_BLANKS;
7154 GROW;
7155
Daniel Veillard21a0f912001-02-25 19:54:14 +00007156 while ((RAW != '>') &&
7157 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007158 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007159 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007160 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007161
7162 attname = xmlParseAttribute(ctxt, &attvalue);
7163 if ((attname != NULL) && (attvalue != NULL)) {
7164 /*
7165 * [ WFC: Unique Att Spec ]
7166 * No attribute name may appear more than once in the same
7167 * start-tag or empty-element tag.
7168 */
7169 for (i = 0; i < nbatts;i += 2) {
7170 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007171 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007172 xmlFree(attvalue);
7173 goto failed;
7174 }
7175 }
Owen Taylor3473f882001-02-23 17:55:21 +00007176 /*
7177 * Add the pair to atts
7178 */
7179 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007180 maxatts = 22; /* allow for 10 attrs by default */
7181 atts = (const xmlChar **)
7182 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007183 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007184 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007185 if (attvalue != NULL)
7186 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007187 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007188 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007189 ctxt->atts = atts;
7190 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007191 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 const xmlChar **n;
7193
Owen Taylor3473f882001-02-23 17:55:21 +00007194 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007195 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007196 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007197 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007198 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007199 if (attvalue != NULL)
7200 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007201 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007202 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007203 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007204 ctxt->atts = atts;
7205 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007206 }
7207 atts[nbatts++] = attname;
7208 atts[nbatts++] = attvalue;
7209 atts[nbatts] = NULL;
7210 atts[nbatts + 1] = NULL;
7211 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007212 if (attvalue != NULL)
7213 xmlFree(attvalue);
7214 }
7215
7216failed:
7217
Daniel Veillard3772de32002-12-17 10:31:45 +00007218 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007219 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7220 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007221 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007222 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7223 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007224 }
7225 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007226 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7227 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007228 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7229 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007230 break;
7231 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007232 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007233 GROW;
7234 }
7235
7236 /*
7237 * SAX: Start of Element !
7238 */
7239 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007240 (!ctxt->disableSAX)) {
7241 if (nbatts > 0)
7242 ctxt->sax->startElement(ctxt->userData, name, atts);
7243 else
7244 ctxt->sax->startElement(ctxt->userData, name, NULL);
7245 }
Owen Taylor3473f882001-02-23 17:55:21 +00007246
7247 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007248 /* Free only the content strings */
7249 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007250 if (atts[i] != NULL)
7251 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007252 }
7253 return(name);
7254}
7255
7256/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007257 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007258 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007259 * @line: line of the start tag
7260 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007261 *
7262 * parse an end of tag
7263 *
7264 * [42] ETag ::= '</' Name S? '>'
7265 *
7266 * With namespace
7267 *
7268 * [NS 9] ETag ::= '</' QName S? '>'
7269 */
7270
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007271static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007273 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007274
7275 GROW;
7276 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007277 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007278 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007279 return;
7280 }
7281 SKIP(2);
7282
Daniel Veillard46de64e2002-05-29 08:21:33 +00007283 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007284
7285 /*
7286 * We should definitely be at the ending "S? '>'" part
7287 */
7288 GROW;
7289 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007290 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007291 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007292 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007293 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007294
7295 /*
7296 * [ WFC: Element Type Match ]
7297 * The Name in an element's end-tag must match the element type in the
7298 * start-tag.
7299 *
7300 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007301 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007302 if (name == NULL) name = BAD_CAST "unparseable";
7303 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007304 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007305 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007306 }
7307
7308 /*
7309 * SAX: End of Tag
7310 */
7311 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7312 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007313 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007314
Daniel Veillarde57ec792003-09-10 10:50:59 +00007315 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007316 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007317 return;
7318}
7319
7320/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007321 * xmlParseEndTag:
7322 * @ctxt: an XML parser context
7323 *
7324 * parse an end of tag
7325 *
7326 * [42] ETag ::= '</' Name S? '>'
7327 *
7328 * With namespace
7329 *
7330 * [NS 9] ETag ::= '</' QName S? '>'
7331 */
7332
7333void
7334xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007335 xmlParseEndTag1(ctxt, 0);
7336}
Daniel Veillard81273902003-09-30 00:43:48 +00007337#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007338
7339/************************************************************************
7340 * *
7341 * SAX 2 specific operations *
7342 * *
7343 ************************************************************************/
7344
7345static const xmlChar *
7346xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7347 int len = 0, l;
7348 int c;
7349 int count = 0;
7350
7351 /*
7352 * Handler for more complex cases
7353 */
7354 GROW;
7355 c = CUR_CHAR(l);
7356 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007357 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007358 return(NULL);
7359 }
7360
7361 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007362 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007363 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007364 (IS_COMBINING(c)) ||
7365 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007366 if (count++ > 100) {
7367 count = 0;
7368 GROW;
7369 }
7370 len += l;
7371 NEXTL(l);
7372 c = CUR_CHAR(l);
7373 }
7374 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7375}
7376
7377/*
7378 * xmlGetNamespace:
7379 * @ctxt: an XML parser context
7380 * @prefix: the prefix to lookup
7381 *
7382 * Lookup the namespace name for the @prefix (which ca be NULL)
7383 * The prefix must come from the @ctxt->dict dictionnary
7384 *
7385 * Returns the namespace name or NULL if not bound
7386 */
7387static const xmlChar *
7388xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7389 int i;
7390
Daniel Veillarde57ec792003-09-10 10:50:59 +00007391 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007392 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007393 if (ctxt->nsTab[i] == prefix) {
7394 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7395 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007396 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007397 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 return(NULL);
7399}
7400
7401/**
7402 * xmlParseNCName:
7403 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007404 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007405 *
7406 * parse an XML name.
7407 *
7408 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7409 * CombiningChar | Extender
7410 *
7411 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7412 *
7413 * Returns the Name parsed or NULL
7414 */
7415
7416static const xmlChar *
7417xmlParseNCName(xmlParserCtxtPtr ctxt) {
7418 const xmlChar *in;
7419 const xmlChar *ret;
7420 int count = 0;
7421
7422 /*
7423 * Accelerator for simple ASCII names
7424 */
7425 in = ctxt->input->cur;
7426 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7427 ((*in >= 0x41) && (*in <= 0x5A)) ||
7428 (*in == '_')) {
7429 in++;
7430 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7431 ((*in >= 0x41) && (*in <= 0x5A)) ||
7432 ((*in >= 0x30) && (*in <= 0x39)) ||
7433 (*in == '_') || (*in == '-') ||
7434 (*in == '.'))
7435 in++;
7436 if ((*in > 0) && (*in < 0x80)) {
7437 count = in - ctxt->input->cur;
7438 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7439 ctxt->input->cur = in;
7440 ctxt->nbChars += count;
7441 ctxt->input->col += count;
7442 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007443 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007444 }
7445 return(ret);
7446 }
7447 }
7448 return(xmlParseNCNameComplex(ctxt));
7449}
7450
7451/**
7452 * xmlParseQName:
7453 * @ctxt: an XML parser context
7454 * @prefix: pointer to store the prefix part
7455 *
7456 * parse an XML Namespace QName
7457 *
7458 * [6] QName ::= (Prefix ':')? LocalPart
7459 * [7] Prefix ::= NCName
7460 * [8] LocalPart ::= NCName
7461 *
7462 * Returns the Name parsed or NULL
7463 */
7464
7465static const xmlChar *
7466xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7467 const xmlChar *l, *p;
7468
7469 GROW;
7470
7471 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007472 if (l == NULL) {
7473 if (CUR == ':') {
7474 l = xmlParseName(ctxt);
7475 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007476 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7477 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007478 *prefix = NULL;
7479 return(l);
7480 }
7481 }
7482 return(NULL);
7483 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484 if (CUR == ':') {
7485 NEXT;
7486 p = l;
7487 l = xmlParseNCName(ctxt);
7488 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007489 xmlChar *tmp;
7490
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007491 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7492 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007493 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7494 p = xmlDictLookup(ctxt->dict, tmp, -1);
7495 if (tmp != NULL) xmlFree(tmp);
7496 *prefix = NULL;
7497 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007498 }
7499 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007500 xmlChar *tmp;
7501
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007502 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7503 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007504 NEXT;
7505 tmp = (xmlChar *) xmlParseName(ctxt);
7506 if (tmp != NULL) {
7507 tmp = xmlBuildQName(tmp, l, NULL, 0);
7508 l = xmlDictLookup(ctxt->dict, tmp, -1);
7509 if (tmp != NULL) xmlFree(tmp);
7510 *prefix = p;
7511 return(l);
7512 }
7513 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7514 l = xmlDictLookup(ctxt->dict, tmp, -1);
7515 if (tmp != NULL) xmlFree(tmp);
7516 *prefix = p;
7517 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 }
7519 *prefix = p;
7520 } else
7521 *prefix = NULL;
7522 return(l);
7523}
7524
7525/**
7526 * xmlParseQNameAndCompare:
7527 * @ctxt: an XML parser context
7528 * @name: the localname
7529 * @prefix: the prefix, if any.
7530 *
7531 * parse an XML name and compares for match
7532 * (specialized for endtag parsing)
7533 *
7534 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7535 * and the name for mismatch
7536 */
7537
7538static const xmlChar *
7539xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7540 xmlChar const *prefix) {
7541 const xmlChar *cmp = name;
7542 const xmlChar *in;
7543 const xmlChar *ret;
7544 const xmlChar *prefix2;
7545
7546 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7547
7548 GROW;
7549 in = ctxt->input->cur;
7550
7551 cmp = prefix;
7552 while (*in != 0 && *in == *cmp) {
7553 ++in;
7554 ++cmp;
7555 }
7556 if ((*cmp == 0) && (*in == ':')) {
7557 in++;
7558 cmp = name;
7559 while (*in != 0 && *in == *cmp) {
7560 ++in;
7561 ++cmp;
7562 }
William M. Brack76e95df2003-10-18 16:20:14 +00007563 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007564 /* success */
7565 ctxt->input->cur = in;
7566 return((const xmlChar*) 1);
7567 }
7568 }
7569 /*
7570 * all strings coms from the dictionary, equality can be done directly
7571 */
7572 ret = xmlParseQName (ctxt, &prefix2);
7573 if ((ret == name) && (prefix == prefix2))
7574 return((const xmlChar*) 1);
7575 return ret;
7576}
7577
7578/**
7579 * xmlParseAttValueInternal:
7580 * @ctxt: an XML parser context
7581 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007582 * @alloc: whether the attribute was reallocated as a new string
7583 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007584 *
7585 * parse a value for an attribute.
7586 * NOTE: if no normalization is needed, the routine will return pointers
7587 * directly from the data buffer.
7588 *
7589 * 3.3.3 Attribute-Value Normalization:
7590 * Before the value of an attribute is passed to the application or
7591 * checked for validity, the XML processor must normalize it as follows:
7592 * - a character reference is processed by appending the referenced
7593 * character to the attribute value
7594 * - an entity reference is processed by recursively processing the
7595 * replacement text of the entity
7596 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7597 * appending #x20 to the normalized value, except that only a single
7598 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7599 * parsed entity or the literal entity value of an internal parsed entity
7600 * - other characters are processed by appending them to the normalized value
7601 * If the declared value is not CDATA, then the XML processor must further
7602 * process the normalized attribute value by discarding any leading and
7603 * trailing space (#x20) characters, and by replacing sequences of space
7604 * (#x20) characters by a single space (#x20) character.
7605 * All attributes for which no declaration has been read should be treated
7606 * by a non-validating parser as if declared CDATA.
7607 *
7608 * Returns the AttValue parsed or NULL. The value has to be freed by the
7609 * caller if it was copied, this can be detected by val[*len] == 0.
7610 */
7611
7612static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007613xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7614 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007615{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007616 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007617 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007618 xmlChar *ret = NULL;
7619
7620 GROW;
7621 in = (xmlChar *) CUR_PTR;
7622 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007623 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 return (NULL);
7625 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007626 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007627
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007628 /*
7629 * try to handle in this routine the most common case where no
7630 * allocation of a new string is required and where content is
7631 * pure ASCII.
7632 */
7633 limit = *in++;
7634 end = ctxt->input->end;
7635 start = in;
7636 if (in >= end) {
7637 const xmlChar *oldbase = ctxt->input->base;
7638 GROW;
7639 if (oldbase != ctxt->input->base) {
7640 long delta = ctxt->input->base - oldbase;
7641 start = start + delta;
7642 in = in + delta;
7643 }
7644 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007646 if (normalize) {
7647 /*
7648 * Skip any leading spaces
7649 */
7650 while ((in < end) && (*in != limit) &&
7651 ((*in == 0x20) || (*in == 0x9) ||
7652 (*in == 0xA) || (*in == 0xD))) {
7653 in++;
7654 start = in;
7655 if (in >= end) {
7656 const xmlChar *oldbase = ctxt->input->base;
7657 GROW;
7658 if (oldbase != ctxt->input->base) {
7659 long delta = ctxt->input->base - oldbase;
7660 start = start + delta;
7661 in = in + delta;
7662 }
7663 end = ctxt->input->end;
7664 }
7665 }
7666 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7667 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7668 if ((*in++ == 0x20) && (*in == 0x20)) break;
7669 if (in >= end) {
7670 const xmlChar *oldbase = ctxt->input->base;
7671 GROW;
7672 if (oldbase != ctxt->input->base) {
7673 long delta = ctxt->input->base - oldbase;
7674 start = start + delta;
7675 in = in + delta;
7676 }
7677 end = ctxt->input->end;
7678 }
7679 }
7680 last = in;
7681 /*
7682 * skip the trailing blanks
7683 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007684 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007685 while ((in < end) && (*in != limit) &&
7686 ((*in == 0x20) || (*in == 0x9) ||
7687 (*in == 0xA) || (*in == 0xD))) {
7688 in++;
7689 if (in >= end) {
7690 const xmlChar *oldbase = ctxt->input->base;
7691 GROW;
7692 if (oldbase != ctxt->input->base) {
7693 long delta = ctxt->input->base - oldbase;
7694 start = start + delta;
7695 in = in + delta;
7696 last = last + delta;
7697 }
7698 end = ctxt->input->end;
7699 }
7700 }
7701 if (*in != limit) goto need_complex;
7702 } else {
7703 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7704 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7705 in++;
7706 if (in >= end) {
7707 const xmlChar *oldbase = ctxt->input->base;
7708 GROW;
7709 if (oldbase != ctxt->input->base) {
7710 long delta = ctxt->input->base - oldbase;
7711 start = start + delta;
7712 in = in + delta;
7713 }
7714 end = ctxt->input->end;
7715 }
7716 }
7717 last = in;
7718 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007719 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007720 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007722 *len = last - start;
7723 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007724 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007725 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007726 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 }
7728 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007729 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007731need_complex:
7732 if (alloc) *alloc = 1;
7733 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007734}
7735
7736/**
7737 * xmlParseAttribute2:
7738 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007739 * @pref: the element prefix
7740 * @elem: the element name
7741 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007742 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007743 * @len: an int * to save the length of the attribute
7744 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745 *
7746 * parse an attribute in the new SAX2 framework.
7747 *
7748 * Returns the attribute name, and the value in *value, .
7749 */
7750
7751static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007752xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7753 const xmlChar *pref, const xmlChar *elem,
7754 const xmlChar **prefix, xmlChar **value,
7755 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007757 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007758 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759
7760 *value = NULL;
7761 GROW;
7762 name = xmlParseQName(ctxt, prefix);
7763 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7765 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007766 return(NULL);
7767 }
7768
7769 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007770 * get the type if needed
7771 */
7772 if (ctxt->attsSpecial != NULL) {
7773 int type;
7774
7775 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7776 pref, elem, *prefix, name);
7777 if (type != 0) normalize = 1;
7778 }
7779
7780 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 * read the value
7782 */
7783 SKIP_BLANKS;
7784 if (RAW == '=') {
7785 NEXT;
7786 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007787 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007788 ctxt->instate = XML_PARSER_CONTENT;
7789 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007790 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007791 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007792 return(NULL);
7793 }
7794
Daniel Veillardd8925572005-06-08 22:34:55 +00007795 if (*prefix == ctxt->str_xml) {
7796 /*
7797 * Check that xml:lang conforms to the specification
7798 * No more registered as an error, just generate a warning now
7799 * since this was deprecated in XML second edition
7800 */
7801 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7802 internal_val = xmlStrndup(val, *len);
7803 if (!xmlCheckLanguageID(internal_val)) {
7804 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7805 "Malformed value for xml:lang : %s\n",
7806 internal_val, NULL);
7807 }
7808 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007809
Daniel Veillardd8925572005-06-08 22:34:55 +00007810 /*
7811 * Check that xml:space conforms to the specification
7812 */
7813 if (xmlStrEqual(name, BAD_CAST "space")) {
7814 internal_val = xmlStrndup(val, *len);
7815 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7816 *(ctxt->space) = 0;
7817 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7818 *(ctxt->space) = 1;
7819 else {
7820 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007822 internal_val, NULL);
7823 }
7824 }
7825 if (internal_val) {
7826 xmlFree(internal_val);
7827 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007828 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007829
7830 *value = val;
7831 return(name);
7832}
7833
7834/**
7835 * xmlParseStartTag2:
7836 * @ctxt: an XML parser context
7837 *
7838 * parse a start of tag either for rule element or
7839 * EmptyElement. In both case we don't parse the tag closing chars.
7840 * This routine is called when running SAX2 parsing
7841 *
7842 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7843 *
7844 * [ WFC: Unique Att Spec ]
7845 * No attribute name may appear more than once in the same start-tag or
7846 * empty-element tag.
7847 *
7848 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7849 *
7850 * [ WFC: Unique Att Spec ]
7851 * No attribute name may appear more than once in the same start-tag or
7852 * empty-element tag.
7853 *
7854 * With namespace:
7855 *
7856 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7857 *
7858 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7859 *
7860 * Returns the element name parsed
7861 */
7862
7863static const xmlChar *
7864xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007865 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 const xmlChar *localname;
7867 const xmlChar *prefix;
7868 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007869 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007870 const xmlChar *nsname;
7871 xmlChar *attvalue;
7872 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007873 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007874 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007875 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007876 const xmlChar *base;
7877 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007878 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007879
7880 if (RAW != '<') return(NULL);
7881 NEXT1;
7882
7883 /*
7884 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7885 * point since the attribute values may be stored as pointers to
7886 * the buffer and calling SHRINK would destroy them !
7887 * The Shrinking is only possible once the full set of attribute
7888 * callbacks have been done.
7889 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007890reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007892 base = ctxt->input->base;
7893 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007894 oldline = ctxt->input->line;
7895 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007896 nbatts = 0;
7897 nratts = 0;
7898 nbdef = 0;
7899 nbNs = 0;
7900 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007901 /* Forget any namespaces added during an earlier parse of this element. */
7902 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903
7904 localname = xmlParseQName(ctxt, &prefix);
7905 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007906 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7907 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 return(NULL);
7909 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007910 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007911
7912 /*
7913 * Now parse the attributes, it ends up with the ending
7914 *
7915 * (S Attribute)* S?
7916 */
7917 SKIP_BLANKS;
7918 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007920
7921 while ((RAW != '>') &&
7922 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007923 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007924 const xmlChar *q = CUR_PTR;
7925 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007926 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007927
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007928 attname = xmlParseAttribute2(ctxt, prefix, localname,
7929 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007930 if (ctxt->input->base != base) {
7931 if ((attvalue != NULL) && (alloc != 0))
7932 xmlFree(attvalue);
7933 attvalue = NULL;
7934 goto base_changed;
7935 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 if ((attname != NULL) && (attvalue != NULL)) {
7937 if (len < 0) len = xmlStrlen(attvalue);
7938 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7940 xmlURIPtr uri;
7941
7942 if (*URL != 0) {
7943 uri = xmlParseURI((const char *) URL);
7944 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007945 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7946 "xmlns: %s not a valid URI\n",
7947 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007948 } else {
7949 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007950 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7951 "xmlns: URI %s is not absolute\n",
7952 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007953 }
7954 xmlFreeURI(uri);
7955 }
7956 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007957 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007958 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007960 for (j = 1;j <= nbNs;j++)
7961 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7962 break;
7963 if (j <= nbNs)
7964 xmlErrAttributeDup(ctxt, NULL, attname);
7965 else
7966 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007967 if (alloc != 0) xmlFree(attvalue);
7968 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007969 continue;
7970 }
7971 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007972 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7973 xmlURIPtr uri;
7974
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007975 if (attname == ctxt->str_xml) {
7976 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007977 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7978 "xml namespace prefix mapped to wrong URI\n",
7979 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007980 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007981 /*
7982 * Do not keep a namespace definition node
7983 */
7984 if (alloc != 0) xmlFree(attvalue);
7985 SKIP_BLANKS;
7986 continue;
7987 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007988 uri = xmlParseURI((const char *) URL);
7989 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007990 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7991 "xmlns:%s: '%s' is not a valid URI\n",
7992 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007993 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007994 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007995 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7996 "xmlns:%s: URI %s is not absolute\n",
7997 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007998 }
7999 xmlFreeURI(uri);
8000 }
8001
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008003 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008004 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008005 for (j = 1;j <= nbNs;j++)
8006 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8007 break;
8008 if (j <= nbNs)
8009 xmlErrAttributeDup(ctxt, aprefix, attname);
8010 else
8011 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008012 if (alloc != 0) xmlFree(attvalue);
8013 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008014 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008015 continue;
8016 }
8017
8018 /*
8019 * Add the pair to atts
8020 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008021 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8022 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008023 if (attvalue[len] == 0)
8024 xmlFree(attvalue);
8025 goto failed;
8026 }
8027 maxatts = ctxt->maxatts;
8028 atts = ctxt->atts;
8029 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008030 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008031 atts[nbatts++] = attname;
8032 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008033 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 atts[nbatts++] = attvalue;
8035 attvalue += len;
8036 atts[nbatts++] = attvalue;
8037 /*
8038 * tag if some deallocation is needed
8039 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008040 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008041 } else {
8042 if ((attvalue != NULL) && (attvalue[len] == 0))
8043 xmlFree(attvalue);
8044 }
8045
8046failed:
8047
8048 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008049 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008050 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8051 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008052 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008053 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8054 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008055 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008056 }
8057 SKIP_BLANKS;
8058 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8059 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008060 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008061 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 break;
8063 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008064 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008065 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008066 }
8067
Daniel Veillard0fb18932003-09-07 09:14:37 +00008068 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008069 * The attributes defaulting
8070 */
8071 if (ctxt->attsDefault != NULL) {
8072 xmlDefAttrsPtr defaults;
8073
8074 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8075 if (defaults != NULL) {
8076 for (i = 0;i < defaults->nbAttrs;i++) {
8077 attname = defaults->values[4 * i];
8078 aprefix = defaults->values[4 * i + 1];
8079
8080 /*
8081 * special work for namespaces defaulted defs
8082 */
8083 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8084 /*
8085 * check that it's not a defined namespace
8086 */
8087 for (j = 1;j <= nbNs;j++)
8088 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8089 break;
8090 if (j <= nbNs) continue;
8091
8092 nsname = xmlGetNamespace(ctxt, NULL);
8093 if (nsname != defaults->values[4 * i + 2]) {
8094 if (nsPush(ctxt, NULL,
8095 defaults->values[4 * i + 2]) > 0)
8096 nbNs++;
8097 }
8098 } else if (aprefix == ctxt->str_xmlns) {
8099 /*
8100 * check that it's not a defined namespace
8101 */
8102 for (j = 1;j <= nbNs;j++)
8103 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8104 break;
8105 if (j <= nbNs) continue;
8106
8107 nsname = xmlGetNamespace(ctxt, attname);
8108 if (nsname != defaults->values[2]) {
8109 if (nsPush(ctxt, attname,
8110 defaults->values[4 * i + 2]) > 0)
8111 nbNs++;
8112 }
8113 } else {
8114 /*
8115 * check that it's not a defined attribute
8116 */
8117 for (j = 0;j < nbatts;j+=5) {
8118 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8119 break;
8120 }
8121 if (j < nbatts) continue;
8122
8123 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8124 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008125 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008126 }
8127 maxatts = ctxt->maxatts;
8128 atts = ctxt->atts;
8129 }
8130 atts[nbatts++] = attname;
8131 atts[nbatts++] = aprefix;
8132 if (aprefix == NULL)
8133 atts[nbatts++] = NULL;
8134 else
8135 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8136 atts[nbatts++] = defaults->values[4 * i + 2];
8137 atts[nbatts++] = defaults->values[4 * i + 3];
8138 nbdef++;
8139 }
8140 }
8141 }
8142 }
8143
Daniel Veillarde70c8772003-11-25 07:21:18 +00008144 /*
8145 * The attributes checkings
8146 */
8147 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008148 /*
8149 * The default namespace does not apply to attribute names.
8150 */
8151 if (atts[i + 1] != NULL) {
8152 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8153 if (nsname == NULL) {
8154 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8155 "Namespace prefix %s for %s on %s is not defined\n",
8156 atts[i + 1], atts[i], localname);
8157 }
8158 atts[i + 2] = nsname;
8159 } else
8160 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008161 /*
8162 * [ WFC: Unique Att Spec ]
8163 * No attribute name may appear more than once in the same
8164 * start-tag or empty-element tag.
8165 * As extended by the Namespace in XML REC.
8166 */
8167 for (j = 0; j < i;j += 5) {
8168 if (atts[i] == atts[j]) {
8169 if (atts[i+1] == atts[j+1]) {
8170 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8171 break;
8172 }
8173 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8174 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8175 "Namespaced Attribute %s in '%s' redefined\n",
8176 atts[i], nsname, NULL);
8177 break;
8178 }
8179 }
8180 }
8181 }
8182
Daniel Veillarde57ec792003-09-10 10:50:59 +00008183 nsname = xmlGetNamespace(ctxt, prefix);
8184 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008185 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8186 "Namespace prefix %s on %s is not defined\n",
8187 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008188 }
8189 *pref = prefix;
8190 *URI = nsname;
8191
8192 /*
8193 * SAX: Start of Element !
8194 */
8195 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8196 (!ctxt->disableSAX)) {
8197 if (nbNs > 0)
8198 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8199 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8200 nbatts / 5, nbdef, atts);
8201 else
8202 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8203 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8204 }
8205
8206 /*
8207 * Free up attribute allocated strings if needed
8208 */
8209 if (attval != 0) {
8210 for (i = 3,j = 0; j < nratts;i += 5,j++)
8211 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8212 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 }
8214
8215 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008216
8217base_changed:
8218 /*
8219 * the attribute strings are valid iif the base didn't changed
8220 */
8221 if (attval != 0) {
8222 for (i = 3,j = 0; j < nratts;i += 5,j++)
8223 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8224 xmlFree((xmlChar *) atts[i]);
8225 }
8226 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008227 ctxt->input->line = oldline;
8228 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008229 if (ctxt->wellFormed == 1) {
8230 goto reparse;
8231 }
8232 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233}
8234
8235/**
8236 * xmlParseEndTag2:
8237 * @ctxt: an XML parser context
8238 * @line: line of the start tag
8239 * @nsNr: number of namespaces on the start tag
8240 *
8241 * parse an end of tag
8242 *
8243 * [42] ETag ::= '</' Name S? '>'
8244 *
8245 * With namespace
8246 *
8247 * [NS 9] ETag ::= '</' QName S? '>'
8248 */
8249
8250static void
8251xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008252 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253 const xmlChar *name;
8254
8255 GROW;
8256 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008257 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008258 return;
8259 }
8260 SKIP(2);
8261
William M. Brack13dfa872004-09-18 04:52:08 +00008262 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008263 if (ctxt->input->cur[tlen] == '>') {
8264 ctxt->input->cur += tlen + 1;
8265 goto done;
8266 }
8267 ctxt->input->cur += tlen;
8268 name = (xmlChar*)1;
8269 } else {
8270 if (prefix == NULL)
8271 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8272 else
8273 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8274 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008275
8276 /*
8277 * We should definitely be at the ending "S? '>'" part
8278 */
8279 GROW;
8280 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008281 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008282 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 } else
8284 NEXT1;
8285
8286 /*
8287 * [ WFC: Element Type Match ]
8288 * The Name in an element's end-tag must match the element type in the
8289 * start-tag.
8290 *
8291 */
8292 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008293 if (name == NULL) name = BAD_CAST "unparseable";
8294 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008295 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008296 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008297 }
8298
8299 /*
8300 * SAX: End of Tag
8301 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008302done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008303 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8304 (!ctxt->disableSAX))
8305 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8306
Daniel Veillard0fb18932003-09-07 09:14:37 +00008307 spacePop(ctxt);
8308 if (nsNr != 0)
8309 nsPop(ctxt, nsNr);
8310 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008311}
8312
8313/**
Owen Taylor3473f882001-02-23 17:55:21 +00008314 * xmlParseCDSect:
8315 * @ctxt: an XML parser context
8316 *
8317 * Parse escaped pure raw content.
8318 *
8319 * [18] CDSect ::= CDStart CData CDEnd
8320 *
8321 * [19] CDStart ::= '<![CDATA['
8322 *
8323 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8324 *
8325 * [21] CDEnd ::= ']]>'
8326 */
8327void
8328xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8329 xmlChar *buf = NULL;
8330 int len = 0;
8331 int size = XML_PARSER_BUFFER_SIZE;
8332 int r, rl;
8333 int s, sl;
8334 int cur, l;
8335 int count = 0;
8336
Daniel Veillard8f597c32003-10-06 08:19:27 +00008337 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008338 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008339 SKIP(9);
8340 } else
8341 return;
8342
8343 ctxt->instate = XML_PARSER_CDATA_SECTION;
8344 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008345 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008346 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008347 ctxt->instate = XML_PARSER_CONTENT;
8348 return;
8349 }
8350 NEXTL(rl);
8351 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008352 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008353 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008354 ctxt->instate = XML_PARSER_CONTENT;
8355 return;
8356 }
8357 NEXTL(sl);
8358 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008359 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008360 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008361 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008362 return;
8363 }
William M. Brack871611b2003-10-18 04:53:14 +00008364 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008365 ((r != ']') || (s != ']') || (cur != '>'))) {
8366 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008367 xmlChar *tmp;
8368
Owen Taylor3473f882001-02-23 17:55:21 +00008369 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008370 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8371 if (tmp == NULL) {
8372 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008373 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008374 return;
8375 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008376 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008377 }
8378 COPY_BUF(rl,buf,len,r);
8379 r = s;
8380 rl = sl;
8381 s = cur;
8382 sl = l;
8383 count++;
8384 if (count > 50) {
8385 GROW;
8386 count = 0;
8387 }
8388 NEXTL(l);
8389 cur = CUR_CHAR(l);
8390 }
8391 buf[len] = 0;
8392 ctxt->instate = XML_PARSER_CONTENT;
8393 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008394 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008395 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008396 xmlFree(buf);
8397 return;
8398 }
8399 NEXTL(l);
8400
8401 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008402 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008403 */
8404 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8405 if (ctxt->sax->cdataBlock != NULL)
8406 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008407 else if (ctxt->sax->characters != NULL)
8408 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008409 }
8410 xmlFree(buf);
8411}
8412
8413/**
8414 * xmlParseContent:
8415 * @ctxt: an XML parser context
8416 *
8417 * Parse a content:
8418 *
8419 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8420 */
8421
8422void
8423xmlParseContent(xmlParserCtxtPtr ctxt) {
8424 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008425 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008426 ((RAW != '<') || (NXT(1) != '/')) &&
8427 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008428 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008429 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008430 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008431
8432 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008433 * First case : a Processing Instruction.
8434 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008435 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008436 xmlParsePI(ctxt);
8437 }
8438
8439 /*
8440 * Second case : a CDSection
8441 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008442 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008443 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008444 xmlParseCDSect(ctxt);
8445 }
8446
8447 /*
8448 * Third case : a comment
8449 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008450 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008451 (NXT(2) == '-') && (NXT(3) == '-')) {
8452 xmlParseComment(ctxt);
8453 ctxt->instate = XML_PARSER_CONTENT;
8454 }
8455
8456 /*
8457 * Fourth case : a sub-element.
8458 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008459 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008460 xmlParseElement(ctxt);
8461 }
8462
8463 /*
8464 * Fifth case : a reference. If if has not been resolved,
8465 * parsing returns it's Name, create the node
8466 */
8467
Daniel Veillard21a0f912001-02-25 19:54:14 +00008468 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008469 xmlParseReference(ctxt);
8470 }
8471
8472 /*
8473 * Last case, text. Note that References are handled directly.
8474 */
8475 else {
8476 xmlParseCharData(ctxt, 0);
8477 }
8478
8479 GROW;
8480 /*
8481 * Pop-up of finished entities.
8482 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008483 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008484 xmlPopInput(ctxt);
8485 SHRINK;
8486
Daniel Veillardfdc91562002-07-01 21:52:03 +00008487 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008488 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8489 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008490 ctxt->instate = XML_PARSER_EOF;
8491 break;
8492 }
8493 }
8494}
8495
8496/**
8497 * xmlParseElement:
8498 * @ctxt: an XML parser context
8499 *
8500 * parse an XML element, this is highly recursive
8501 *
8502 * [39] element ::= EmptyElemTag | STag content ETag
8503 *
8504 * [ WFC: Element Type Match ]
8505 * The Name in an element's end-tag must match the element type in the
8506 * start-tag.
8507 *
Owen Taylor3473f882001-02-23 17:55:21 +00008508 */
8509
8510void
8511xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008512 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008513 const xmlChar *prefix;
8514 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008516 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008517 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008518 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008519
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008520 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8521 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8522 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8523 xmlParserMaxDepth);
8524 ctxt->instate = XML_PARSER_EOF;
8525 return;
8526 }
8527
Owen Taylor3473f882001-02-23 17:55:21 +00008528 /* Capture start position */
8529 if (ctxt->record_info) {
8530 node_info.begin_pos = ctxt->input->consumed +
8531 (CUR_PTR - ctxt->input->base);
8532 node_info.begin_line = ctxt->input->line;
8533 }
8534
8535 if (ctxt->spaceNr == 0)
8536 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008537 else if (*ctxt->space == -2)
8538 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008539 else
8540 spacePush(ctxt, *ctxt->space);
8541
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008542 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008543#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008545#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008546 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008547#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008548 else
8549 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008550#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008551 if (name == NULL) {
8552 spacePop(ctxt);
8553 return;
8554 }
8555 namePush(ctxt, name);
8556 ret = ctxt->node;
8557
Daniel Veillard4432df22003-09-28 18:58:27 +00008558#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008559 /*
8560 * [ VC: Root Element Type ]
8561 * The Name in the document type declaration must match the element
8562 * type of the root element.
8563 */
8564 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8565 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8566 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008567#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008568
8569 /*
8570 * Check for an Empty Element.
8571 */
8572 if ((RAW == '/') && (NXT(1) == '>')) {
8573 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008574 if (ctxt->sax2) {
8575 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8576 (!ctxt->disableSAX))
8577 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008578#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579 } else {
8580 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8581 (!ctxt->disableSAX))
8582 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008583#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008584 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008585 namePop(ctxt);
8586 spacePop(ctxt);
8587 if (nsNr != ctxt->nsNr)
8588 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008589 if ( ret != NULL && ctxt->record_info ) {
8590 node_info.end_pos = ctxt->input->consumed +
8591 (CUR_PTR - ctxt->input->base);
8592 node_info.end_line = ctxt->input->line;
8593 node_info.node = ret;
8594 xmlParserAddNodeInfo(ctxt, &node_info);
8595 }
8596 return;
8597 }
8598 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008599 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008600 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008601 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8602 "Couldn't find end of Start Tag %s line %d\n",
8603 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008604
8605 /*
8606 * end of parsing of this node.
8607 */
8608 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008609 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008610 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008611 if (nsNr != ctxt->nsNr)
8612 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008613
8614 /*
8615 * Capture end position and add node
8616 */
8617 if ( ret != NULL && ctxt->record_info ) {
8618 node_info.end_pos = ctxt->input->consumed +
8619 (CUR_PTR - ctxt->input->base);
8620 node_info.end_line = ctxt->input->line;
8621 node_info.node = ret;
8622 xmlParserAddNodeInfo(ctxt, &node_info);
8623 }
8624 return;
8625 }
8626
8627 /*
8628 * Parse the content of the element:
8629 */
8630 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008631 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008632 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008633 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008634 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008635
8636 /*
8637 * end of parsing of this node.
8638 */
8639 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008640 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008641 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 if (nsNr != ctxt->nsNr)
8643 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 return;
8645 }
8646
8647 /*
8648 * parse the end of tag: '</' should be here.
8649 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008650 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008651 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008652 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008653 }
8654#ifdef LIBXML_SAX1_ENABLED
8655 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008656 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008657#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008658
8659 /*
8660 * Capture end position and add node
8661 */
8662 if ( ret != NULL && ctxt->record_info ) {
8663 node_info.end_pos = ctxt->input->consumed +
8664 (CUR_PTR - ctxt->input->base);
8665 node_info.end_line = ctxt->input->line;
8666 node_info.node = ret;
8667 xmlParserAddNodeInfo(ctxt, &node_info);
8668 }
8669}
8670
8671/**
8672 * xmlParseVersionNum:
8673 * @ctxt: an XML parser context
8674 *
8675 * parse the XML version value.
8676 *
8677 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8678 *
8679 * Returns the string giving the XML version number, or NULL
8680 */
8681xmlChar *
8682xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8683 xmlChar *buf = NULL;
8684 int len = 0;
8685 int size = 10;
8686 xmlChar cur;
8687
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008688 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008689 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008690 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008691 return(NULL);
8692 }
8693 cur = CUR;
8694 while (((cur >= 'a') && (cur <= 'z')) ||
8695 ((cur >= 'A') && (cur <= 'Z')) ||
8696 ((cur >= '0') && (cur <= '9')) ||
8697 (cur == '_') || (cur == '.') ||
8698 (cur == ':') || (cur == '-')) {
8699 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008700 xmlChar *tmp;
8701
Owen Taylor3473f882001-02-23 17:55:21 +00008702 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008703 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8704 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008705 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 return(NULL);
8707 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008708 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008709 }
8710 buf[len++] = cur;
8711 NEXT;
8712 cur=CUR;
8713 }
8714 buf[len] = 0;
8715 return(buf);
8716}
8717
8718/**
8719 * xmlParseVersionInfo:
8720 * @ctxt: an XML parser context
8721 *
8722 * parse the XML version.
8723 *
8724 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8725 *
8726 * [25] Eq ::= S? '=' S?
8727 *
8728 * Returns the version string, e.g. "1.0"
8729 */
8730
8731xmlChar *
8732xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8733 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008734
Daniel Veillarda07050d2003-10-19 14:46:32 +00008735 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008736 SKIP(7);
8737 SKIP_BLANKS;
8738 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008739 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008740 return(NULL);
8741 }
8742 NEXT;
8743 SKIP_BLANKS;
8744 if (RAW == '"') {
8745 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008746 version = xmlParseVersionNum(ctxt);
8747 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008748 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 } else
8750 NEXT;
8751 } else if (RAW == '\''){
8752 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008753 version = xmlParseVersionNum(ctxt);
8754 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008755 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008756 } else
8757 NEXT;
8758 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008759 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008760 }
8761 }
8762 return(version);
8763}
8764
8765/**
8766 * xmlParseEncName:
8767 * @ctxt: an XML parser context
8768 *
8769 * parse the XML encoding name
8770 *
8771 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8772 *
8773 * Returns the encoding name value or NULL
8774 */
8775xmlChar *
8776xmlParseEncName(xmlParserCtxtPtr ctxt) {
8777 xmlChar *buf = NULL;
8778 int len = 0;
8779 int size = 10;
8780 xmlChar cur;
8781
8782 cur = CUR;
8783 if (((cur >= 'a') && (cur <= 'z')) ||
8784 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008785 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008786 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008787 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008788 return(NULL);
8789 }
8790
8791 buf[len++] = cur;
8792 NEXT;
8793 cur = CUR;
8794 while (((cur >= 'a') && (cur <= 'z')) ||
8795 ((cur >= 'A') && (cur <= 'Z')) ||
8796 ((cur >= '0') && (cur <= '9')) ||
8797 (cur == '.') || (cur == '_') ||
8798 (cur == '-')) {
8799 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008800 xmlChar *tmp;
8801
Owen Taylor3473f882001-02-23 17:55:21 +00008802 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008803 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8804 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008805 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008806 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008807 return(NULL);
8808 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008809 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008810 }
8811 buf[len++] = cur;
8812 NEXT;
8813 cur = CUR;
8814 if (cur == 0) {
8815 SHRINK;
8816 GROW;
8817 cur = CUR;
8818 }
8819 }
8820 buf[len] = 0;
8821 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008822 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 }
8824 return(buf);
8825}
8826
8827/**
8828 * xmlParseEncodingDecl:
8829 * @ctxt: an XML parser context
8830 *
8831 * parse the XML encoding declaration
8832 *
8833 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8834 *
8835 * this setups the conversion filters.
8836 *
8837 * Returns the encoding value or NULL
8838 */
8839
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008840const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008841xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8842 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008843
8844 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008845 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008846 SKIP(8);
8847 SKIP_BLANKS;
8848 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008849 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008850 return(NULL);
8851 }
8852 NEXT;
8853 SKIP_BLANKS;
8854 if (RAW == '"') {
8855 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008856 encoding = xmlParseEncName(ctxt);
8857 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008859 } else
8860 NEXT;
8861 } else if (RAW == '\''){
8862 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008863 encoding = xmlParseEncName(ctxt);
8864 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008865 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008866 } else
8867 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008868 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008869 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008870 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008871 /*
8872 * UTF-16 encoding stwich has already taken place at this stage,
8873 * more over the little-endian/big-endian selection is already done
8874 */
8875 if ((encoding != NULL) &&
8876 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8877 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008878 if (ctxt->encoding != NULL)
8879 xmlFree((xmlChar *) ctxt->encoding);
8880 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008881 }
8882 /*
8883 * UTF-8 encoding is handled natively
8884 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008885 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008886 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8887 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008888 if (ctxt->encoding != NULL)
8889 xmlFree((xmlChar *) ctxt->encoding);
8890 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008891 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008892 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008893 xmlCharEncodingHandlerPtr handler;
8894
8895 if (ctxt->input->encoding != NULL)
8896 xmlFree((xmlChar *) ctxt->input->encoding);
8897 ctxt->input->encoding = encoding;
8898
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008899 handler = xmlFindCharEncodingHandler((const char *) encoding);
8900 if (handler != NULL) {
8901 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008902 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008903 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008904 "Unsupported encoding %s\n", encoding);
8905 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008906 }
8907 }
8908 }
8909 return(encoding);
8910}
8911
8912/**
8913 * xmlParseSDDecl:
8914 * @ctxt: an XML parser context
8915 *
8916 * parse the XML standalone declaration
8917 *
8918 * [32] SDDecl ::= S 'standalone' Eq
8919 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8920 *
8921 * [ VC: Standalone Document Declaration ]
8922 * TODO The standalone document declaration must have the value "no"
8923 * if any external markup declarations contain declarations of:
8924 * - attributes with default values, if elements to which these
8925 * attributes apply appear in the document without specifications
8926 * of values for these attributes, or
8927 * - entities (other than amp, lt, gt, apos, quot), if references
8928 * to those entities appear in the document, or
8929 * - attributes with values subject to normalization, where the
8930 * attribute appears in the document with a value which will change
8931 * as a result of normalization, or
8932 * - element types with element content, if white space occurs directly
8933 * within any instance of those types.
8934 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008935 * Returns:
8936 * 1 if standalone="yes"
8937 * 0 if standalone="no"
8938 * -2 if standalone attribute is missing or invalid
8939 * (A standalone value of -2 means that the XML declaration was found,
8940 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00008941 */
8942
8943int
8944xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008945 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00008946
8947 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008948 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008949 SKIP(10);
8950 SKIP_BLANKS;
8951 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008952 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008953 return(standalone);
8954 }
8955 NEXT;
8956 SKIP_BLANKS;
8957 if (RAW == '\''){
8958 NEXT;
8959 if ((RAW == 'n') && (NXT(1) == 'o')) {
8960 standalone = 0;
8961 SKIP(2);
8962 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8963 (NXT(2) == 's')) {
8964 standalone = 1;
8965 SKIP(3);
8966 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008967 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008968 }
8969 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008970 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008971 } else
8972 NEXT;
8973 } else if (RAW == '"'){
8974 NEXT;
8975 if ((RAW == 'n') && (NXT(1) == 'o')) {
8976 standalone = 0;
8977 SKIP(2);
8978 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8979 (NXT(2) == 's')) {
8980 standalone = 1;
8981 SKIP(3);
8982 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008983 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008984 }
8985 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008986 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008987 } else
8988 NEXT;
8989 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008990 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008991 }
8992 }
8993 return(standalone);
8994}
8995
8996/**
8997 * xmlParseXMLDecl:
8998 * @ctxt: an XML parser context
8999 *
9000 * parse an XML declaration header
9001 *
9002 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9003 */
9004
9005void
9006xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9007 xmlChar *version;
9008
9009 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009010 * This value for standalone indicates that the document has an
9011 * XML declaration but it does not have a standalone attribute.
9012 * It will be overwritten later if a standalone attribute is found.
9013 */
9014 ctxt->input->standalone = -2;
9015
9016 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009017 * We know that '<?xml' is here.
9018 */
9019 SKIP(5);
9020
William M. Brack76e95df2003-10-18 16:20:14 +00009021 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9023 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009024 }
9025 SKIP_BLANKS;
9026
9027 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009028 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009029 */
9030 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009031 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009032 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009033 } else {
9034 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9035 /*
9036 * TODO: Blueberry should be detected here
9037 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009038 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9039 "Unsupported version '%s'\n",
9040 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009041 }
9042 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009043 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009044 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009045 }
Owen Taylor3473f882001-02-23 17:55:21 +00009046
9047 /*
9048 * We may have the encoding declaration
9049 */
William M. Brack76e95df2003-10-18 16:20:14 +00009050 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009051 if ((RAW == '?') && (NXT(1) == '>')) {
9052 SKIP(2);
9053 return;
9054 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009055 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009056 }
9057 xmlParseEncodingDecl(ctxt);
9058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9059 /*
9060 * The XML REC instructs us to stop parsing right here
9061 */
9062 return;
9063 }
9064
9065 /*
9066 * We may have the standalone status.
9067 */
William M. Brack76e95df2003-10-18 16:20:14 +00009068 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009069 if ((RAW == '?') && (NXT(1) == '>')) {
9070 SKIP(2);
9071 return;
9072 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009073 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009074 }
9075 SKIP_BLANKS;
9076 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9077
9078 SKIP_BLANKS;
9079 if ((RAW == '?') && (NXT(1) == '>')) {
9080 SKIP(2);
9081 } else if (RAW == '>') {
9082 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009083 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009084 NEXT;
9085 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009086 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009087 MOVETO_ENDTAG(CUR_PTR);
9088 NEXT;
9089 }
9090}
9091
9092/**
9093 * xmlParseMisc:
9094 * @ctxt: an XML parser context
9095 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009096 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009097 *
9098 * [27] Misc ::= Comment | PI | S
9099 */
9100
9101void
9102xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009103 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009104 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009105 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009106 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009107 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009108 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009109 NEXT;
9110 } else
9111 xmlParseComment(ctxt);
9112 }
9113}
9114
9115/**
9116 * xmlParseDocument:
9117 * @ctxt: an XML parser context
9118 *
9119 * parse an XML document (and build a tree if using the standard SAX
9120 * interface).
9121 *
9122 * [1] document ::= prolog element Misc*
9123 *
9124 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9125 *
9126 * Returns 0, -1 in case of error. the parser context is augmented
9127 * as a result of the parsing.
9128 */
9129
9130int
9131xmlParseDocument(xmlParserCtxtPtr ctxt) {
9132 xmlChar start[4];
9133 xmlCharEncoding enc;
9134
9135 xmlInitParser();
9136
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009137 if ((ctxt == NULL) || (ctxt->input == NULL))
9138 return(-1);
9139
Owen Taylor3473f882001-02-23 17:55:21 +00009140 GROW;
9141
9142 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009143 * SAX: detecting the level.
9144 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009145 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146
9147 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009148 * SAX: beginning of the document processing.
9149 */
9150 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9151 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9152
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009153 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9154 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009155 /*
9156 * Get the 4 first bytes and decode the charset
9157 * if enc != XML_CHAR_ENCODING_NONE
9158 * plug some encoding conversion routines.
9159 */
9160 start[0] = RAW;
9161 start[1] = NXT(1);
9162 start[2] = NXT(2);
9163 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009164 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009165 if (enc != XML_CHAR_ENCODING_NONE) {
9166 xmlSwitchEncoding(ctxt, enc);
9167 }
Owen Taylor3473f882001-02-23 17:55:21 +00009168 }
9169
9170
9171 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009172 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009173 }
9174
9175 /*
9176 * Check for the XMLDecl in the Prolog.
9177 */
9178 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009179 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009180
9181 /*
9182 * Note that we will switch encoding on the fly.
9183 */
9184 xmlParseXMLDecl(ctxt);
9185 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9186 /*
9187 * The XML REC instructs us to stop parsing right here
9188 */
9189 return(-1);
9190 }
9191 ctxt->standalone = ctxt->input->standalone;
9192 SKIP_BLANKS;
9193 } else {
9194 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9195 }
9196 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9197 ctxt->sax->startDocument(ctxt->userData);
9198
9199 /*
9200 * The Misc part of the Prolog
9201 */
9202 GROW;
9203 xmlParseMisc(ctxt);
9204
9205 /*
9206 * Then possibly doc type declaration(s) and more Misc
9207 * (doctypedecl Misc*)?
9208 */
9209 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009210 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009211
9212 ctxt->inSubset = 1;
9213 xmlParseDocTypeDecl(ctxt);
9214 if (RAW == '[') {
9215 ctxt->instate = XML_PARSER_DTD;
9216 xmlParseInternalSubset(ctxt);
9217 }
9218
9219 /*
9220 * Create and update the external subset.
9221 */
9222 ctxt->inSubset = 2;
9223 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9224 (!ctxt->disableSAX))
9225 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9226 ctxt->extSubSystem, ctxt->extSubURI);
9227 ctxt->inSubset = 0;
9228
9229
9230 ctxt->instate = XML_PARSER_PROLOG;
9231 xmlParseMisc(ctxt);
9232 }
9233
9234 /*
9235 * Time to start parsing the tree itself
9236 */
9237 GROW;
9238 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009239 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9240 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009241 } else {
9242 ctxt->instate = XML_PARSER_CONTENT;
9243 xmlParseElement(ctxt);
9244 ctxt->instate = XML_PARSER_EPILOG;
9245
9246
9247 /*
9248 * The Misc part at the end
9249 */
9250 xmlParseMisc(ctxt);
9251
Daniel Veillard561b7f82002-03-20 21:55:57 +00009252 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009253 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009254 }
9255 ctxt->instate = XML_PARSER_EOF;
9256 }
9257
9258 /*
9259 * SAX: end of the document processing.
9260 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009261 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009262 ctxt->sax->endDocument(ctxt->userData);
9263
Daniel Veillard5997aca2002-03-18 18:36:20 +00009264 /*
9265 * Remove locally kept entity definitions if the tree was not built
9266 */
9267 if ((ctxt->myDoc != NULL) &&
9268 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9269 xmlFreeDoc(ctxt->myDoc);
9270 ctxt->myDoc = NULL;
9271 }
9272
Daniel Veillardc7612992002-02-17 22:47:37 +00009273 if (! ctxt->wellFormed) {
9274 ctxt->valid = 0;
9275 return(-1);
9276 }
Owen Taylor3473f882001-02-23 17:55:21 +00009277 return(0);
9278}
9279
9280/**
9281 * xmlParseExtParsedEnt:
9282 * @ctxt: an XML parser context
9283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009284 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009285 * An external general parsed entity is well-formed if it matches the
9286 * production labeled extParsedEnt.
9287 *
9288 * [78] extParsedEnt ::= TextDecl? content
9289 *
9290 * Returns 0, -1 in case of error. the parser context is augmented
9291 * as a result of the parsing.
9292 */
9293
9294int
9295xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9296 xmlChar start[4];
9297 xmlCharEncoding enc;
9298
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009299 if ((ctxt == NULL) || (ctxt->input == NULL))
9300 return(-1);
9301
Owen Taylor3473f882001-02-23 17:55:21 +00009302 xmlDefaultSAXHandlerInit();
9303
Daniel Veillard309f81d2003-09-23 09:02:53 +00009304 xmlDetectSAX2(ctxt);
9305
Owen Taylor3473f882001-02-23 17:55:21 +00009306 GROW;
9307
9308 /*
9309 * SAX: beginning of the document processing.
9310 */
9311 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9312 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9313
9314 /*
9315 * Get the 4 first bytes and decode the charset
9316 * if enc != XML_CHAR_ENCODING_NONE
9317 * plug some encoding conversion routines.
9318 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009319 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9320 start[0] = RAW;
9321 start[1] = NXT(1);
9322 start[2] = NXT(2);
9323 start[3] = NXT(3);
9324 enc = xmlDetectCharEncoding(start, 4);
9325 if (enc != XML_CHAR_ENCODING_NONE) {
9326 xmlSwitchEncoding(ctxt, enc);
9327 }
Owen Taylor3473f882001-02-23 17:55:21 +00009328 }
9329
9330
9331 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009332 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009333 }
9334
9335 /*
9336 * Check for the XMLDecl in the Prolog.
9337 */
9338 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009339 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009340
9341 /*
9342 * Note that we will switch encoding on the fly.
9343 */
9344 xmlParseXMLDecl(ctxt);
9345 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9346 /*
9347 * The XML REC instructs us to stop parsing right here
9348 */
9349 return(-1);
9350 }
9351 SKIP_BLANKS;
9352 } else {
9353 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9354 }
9355 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9356 ctxt->sax->startDocument(ctxt->userData);
9357
9358 /*
9359 * Doing validity checking on chunk doesn't make sense
9360 */
9361 ctxt->instate = XML_PARSER_CONTENT;
9362 ctxt->validate = 0;
9363 ctxt->loadsubset = 0;
9364 ctxt->depth = 0;
9365
9366 xmlParseContent(ctxt);
9367
9368 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009369 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009370 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009371 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009372 }
9373
9374 /*
9375 * SAX: end of the document processing.
9376 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009377 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009378 ctxt->sax->endDocument(ctxt->userData);
9379
9380 if (! ctxt->wellFormed) return(-1);
9381 return(0);
9382}
9383
Daniel Veillard73b013f2003-09-30 12:36:01 +00009384#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009385/************************************************************************
9386 * *
9387 * Progressive parsing interfaces *
9388 * *
9389 ************************************************************************/
9390
9391/**
9392 * xmlParseLookupSequence:
9393 * @ctxt: an XML parser context
9394 * @first: the first char to lookup
9395 * @next: the next char to lookup or zero
9396 * @third: the next char to lookup or zero
9397 *
9398 * Try to find if a sequence (first, next, third) or just (first next) or
9399 * (first) is available in the input stream.
9400 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9401 * to avoid rescanning sequences of bytes, it DOES change the state of the
9402 * parser, do not use liberally.
9403 *
9404 * Returns the index to the current parsing point if the full sequence
9405 * is available, -1 otherwise.
9406 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009407static int
Owen Taylor3473f882001-02-23 17:55:21 +00009408xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9409 xmlChar next, xmlChar third) {
9410 int base, len;
9411 xmlParserInputPtr in;
9412 const xmlChar *buf;
9413
9414 in = ctxt->input;
9415 if (in == NULL) return(-1);
9416 base = in->cur - in->base;
9417 if (base < 0) return(-1);
9418 if (ctxt->checkIndex > base)
9419 base = ctxt->checkIndex;
9420 if (in->buf == NULL) {
9421 buf = in->base;
9422 len = in->length;
9423 } else {
9424 buf = in->buf->buffer->content;
9425 len = in->buf->buffer->use;
9426 }
9427 /* take into account the sequence length */
9428 if (third) len -= 2;
9429 else if (next) len --;
9430 for (;base < len;base++) {
9431 if (buf[base] == first) {
9432 if (third != 0) {
9433 if ((buf[base + 1] != next) ||
9434 (buf[base + 2] != third)) continue;
9435 } else if (next != 0) {
9436 if (buf[base + 1] != next) continue;
9437 }
9438 ctxt->checkIndex = 0;
9439#ifdef DEBUG_PUSH
9440 if (next == 0)
9441 xmlGenericError(xmlGenericErrorContext,
9442 "PP: lookup '%c' found at %d\n",
9443 first, base);
9444 else if (third == 0)
9445 xmlGenericError(xmlGenericErrorContext,
9446 "PP: lookup '%c%c' found at %d\n",
9447 first, next, base);
9448 else
9449 xmlGenericError(xmlGenericErrorContext,
9450 "PP: lookup '%c%c%c' found at %d\n",
9451 first, next, third, base);
9452#endif
9453 return(base - (in->cur - in->base));
9454 }
9455 }
9456 ctxt->checkIndex = base;
9457#ifdef DEBUG_PUSH
9458 if (next == 0)
9459 xmlGenericError(xmlGenericErrorContext,
9460 "PP: lookup '%c' failed\n", first);
9461 else if (third == 0)
9462 xmlGenericError(xmlGenericErrorContext,
9463 "PP: lookup '%c%c' failed\n", first, next);
9464 else
9465 xmlGenericError(xmlGenericErrorContext,
9466 "PP: lookup '%c%c%c' failed\n", first, next, third);
9467#endif
9468 return(-1);
9469}
9470
9471/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 * xmlParseGetLasts:
9473 * @ctxt: an XML parser context
9474 * @lastlt: pointer to store the last '<' from the input
9475 * @lastgt: pointer to store the last '>' from the input
9476 *
9477 * Lookup the last < and > in the current chunk
9478 */
9479static void
9480xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9481 const xmlChar **lastgt) {
9482 const xmlChar *tmp;
9483
9484 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9485 xmlGenericError(xmlGenericErrorContext,
9486 "Internal error: xmlParseGetLasts\n");
9487 return;
9488 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009489 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009490 tmp = ctxt->input->end;
9491 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009492 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009493 if (tmp < ctxt->input->base) {
9494 *lastlt = NULL;
9495 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009496 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009497 *lastlt = tmp;
9498 tmp++;
9499 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9500 if (*tmp == '\'') {
9501 tmp++;
9502 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9503 if (tmp < ctxt->input->end) tmp++;
9504 } else if (*tmp == '"') {
9505 tmp++;
9506 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9507 if (tmp < ctxt->input->end) tmp++;
9508 } else
9509 tmp++;
9510 }
9511 if (tmp < ctxt->input->end)
9512 *lastgt = tmp;
9513 else {
9514 tmp = *lastlt;
9515 tmp--;
9516 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9517 if (tmp >= ctxt->input->base)
9518 *lastgt = tmp;
9519 else
9520 *lastgt = NULL;
9521 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009522 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009523 } else {
9524 *lastlt = NULL;
9525 *lastgt = NULL;
9526 }
9527}
9528/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009529 * xmlCheckCdataPush:
9530 * @cur: pointer to the bock of characters
9531 * @len: length of the block in bytes
9532 *
9533 * Check that the block of characters is okay as SCdata content [20]
9534 *
9535 * Returns the number of bytes to pass if okay, a negative index where an
9536 * UTF-8 error occured otherwise
9537 */
9538static int
9539xmlCheckCdataPush(const xmlChar *utf, int len) {
9540 int ix;
9541 unsigned char c;
9542 int codepoint;
9543
9544 if ((utf == NULL) || (len <= 0))
9545 return(0);
9546
9547 for (ix = 0; ix < len;) { /* string is 0-terminated */
9548 c = utf[ix];
9549 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9550 if (c >= 0x20)
9551 ix++;
9552 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9553 ix++;
9554 else
9555 return(-ix);
9556 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9557 if (ix + 2 > len) return(ix);
9558 if ((utf[ix+1] & 0xc0 ) != 0x80)
9559 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009560 codepoint = (utf[ix] & 0x1f) << 6;
9561 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009562 if (!xmlIsCharQ(codepoint))
9563 return(-ix);
9564 ix += 2;
9565 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9566 if (ix + 3 > len) return(ix);
9567 if (((utf[ix+1] & 0xc0) != 0x80) ||
9568 ((utf[ix+2] & 0xc0) != 0x80))
9569 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009570 codepoint = (utf[ix] & 0xf) << 12;
9571 codepoint |= (utf[ix+1] & 0x3f) << 6;
9572 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009573 if (!xmlIsCharQ(codepoint))
9574 return(-ix);
9575 ix += 3;
9576 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9577 if (ix + 4 > len) return(ix);
9578 if (((utf[ix+1] & 0xc0) != 0x80) ||
9579 ((utf[ix+2] & 0xc0) != 0x80) ||
9580 ((utf[ix+3] & 0xc0) != 0x80))
9581 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009582 codepoint = (utf[ix] & 0x7) << 18;
9583 codepoint |= (utf[ix+1] & 0x3f) << 12;
9584 codepoint |= (utf[ix+2] & 0x3f) << 6;
9585 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009586 if (!xmlIsCharQ(codepoint))
9587 return(-ix);
9588 ix += 4;
9589 } else /* unknown encoding */
9590 return(-ix);
9591 }
9592 return(ix);
9593}
9594
9595/**
Owen Taylor3473f882001-02-23 17:55:21 +00009596 * xmlParseTryOrFinish:
9597 * @ctxt: an XML parser context
9598 * @terminate: last chunk indicator
9599 *
9600 * Try to progress on parsing
9601 *
9602 * Returns zero if no parsing was possible
9603 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009604static int
Owen Taylor3473f882001-02-23 17:55:21 +00009605xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9606 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009607 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009608 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009609 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009610
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009611 if (ctxt->input == NULL)
9612 return(0);
9613
Owen Taylor3473f882001-02-23 17:55:21 +00009614#ifdef DEBUG_PUSH
9615 switch (ctxt->instate) {
9616 case XML_PARSER_EOF:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try EOF\n"); break;
9619 case XML_PARSER_START:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try START\n"); break;
9622 case XML_PARSER_MISC:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try MISC\n");break;
9625 case XML_PARSER_COMMENT:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try COMMENT\n");break;
9628 case XML_PARSER_PROLOG:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try PROLOG\n");break;
9631 case XML_PARSER_START_TAG:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try START_TAG\n");break;
9634 case XML_PARSER_CONTENT:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try CONTENT\n");break;
9637 case XML_PARSER_CDATA_SECTION:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try CDATA_SECTION\n");break;
9640 case XML_PARSER_END_TAG:
9641 xmlGenericError(xmlGenericErrorContext,
9642 "PP: try END_TAG\n");break;
9643 case XML_PARSER_ENTITY_DECL:
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: try ENTITY_DECL\n");break;
9646 case XML_PARSER_ENTITY_VALUE:
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: try ENTITY_VALUE\n");break;
9649 case XML_PARSER_ATTRIBUTE_VALUE:
9650 xmlGenericError(xmlGenericErrorContext,
9651 "PP: try ATTRIBUTE_VALUE\n");break;
9652 case XML_PARSER_DTD:
9653 xmlGenericError(xmlGenericErrorContext,
9654 "PP: try DTD\n");break;
9655 case XML_PARSER_EPILOG:
9656 xmlGenericError(xmlGenericErrorContext,
9657 "PP: try EPILOG\n");break;
9658 case XML_PARSER_PI:
9659 xmlGenericError(xmlGenericErrorContext,
9660 "PP: try PI\n");break;
9661 case XML_PARSER_IGNORE:
9662 xmlGenericError(xmlGenericErrorContext,
9663 "PP: try IGNORE\n");break;
9664 }
9665#endif
9666
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009667 if ((ctxt->input != NULL) &&
9668 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009669 xmlSHRINK(ctxt);
9670 ctxt->checkIndex = 0;
9671 }
9672 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009673
Daniel Veillarda880b122003-04-21 21:36:41 +00009674 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009675 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009676 return(0);
9677
9678
Owen Taylor3473f882001-02-23 17:55:21 +00009679 /*
9680 * Pop-up of finished entities.
9681 */
9682 while ((RAW == 0) && (ctxt->inputNr > 1))
9683 xmlPopInput(ctxt);
9684
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009685 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009686 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009687 avail = ctxt->input->length -
9688 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009689 else {
9690 /*
9691 * If we are operating on converted input, try to flush
9692 * remainng chars to avoid them stalling in the non-converted
9693 * buffer.
9694 */
9695 if ((ctxt->input->buf->raw != NULL) &&
9696 (ctxt->input->buf->raw->use > 0)) {
9697 int base = ctxt->input->base -
9698 ctxt->input->buf->buffer->content;
9699 int current = ctxt->input->cur - ctxt->input->base;
9700
9701 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9702 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9703 ctxt->input->cur = ctxt->input->base + current;
9704 ctxt->input->end =
9705 &ctxt->input->buf->buffer->content[
9706 ctxt->input->buf->buffer->use];
9707 }
9708 avail = ctxt->input->buf->buffer->use -
9709 (ctxt->input->cur - ctxt->input->base);
9710 }
Owen Taylor3473f882001-02-23 17:55:21 +00009711 if (avail < 1)
9712 goto done;
9713 switch (ctxt->instate) {
9714 case XML_PARSER_EOF:
9715 /*
9716 * Document parsing is done !
9717 */
9718 goto done;
9719 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009720 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9721 xmlChar start[4];
9722 xmlCharEncoding enc;
9723
9724 /*
9725 * Very first chars read from the document flow.
9726 */
9727 if (avail < 4)
9728 goto done;
9729
9730 /*
9731 * Get the 4 first bytes and decode the charset
9732 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009733 * plug some encoding conversion routines,
9734 * else xmlSwitchEncoding will set to (default)
9735 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009736 */
9737 start[0] = RAW;
9738 start[1] = NXT(1);
9739 start[2] = NXT(2);
9740 start[3] = NXT(3);
9741 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009742 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009743 break;
9744 }
Owen Taylor3473f882001-02-23 17:55:21 +00009745
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009746 if (avail < 2)
9747 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009748 cur = ctxt->input->cur[0];
9749 next = ctxt->input->cur[1];
9750 if (cur == 0) {
9751 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9752 ctxt->sax->setDocumentLocator(ctxt->userData,
9753 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009754 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009755 ctxt->instate = XML_PARSER_EOF;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: entering EOF\n");
9759#endif
9760 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9761 ctxt->sax->endDocument(ctxt->userData);
9762 goto done;
9763 }
9764 if ((cur == '<') && (next == '?')) {
9765 /* PI or XML decl */
9766 if (avail < 5) return(ret);
9767 if ((!terminate) &&
9768 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9769 return(ret);
9770 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9771 ctxt->sax->setDocumentLocator(ctxt->userData,
9772 &xmlDefaultSAXLocator);
9773 if ((ctxt->input->cur[2] == 'x') &&
9774 (ctxt->input->cur[3] == 'm') &&
9775 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009776 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009777 ret += 5;
9778#ifdef DEBUG_PUSH
9779 xmlGenericError(xmlGenericErrorContext,
9780 "PP: Parsing XML Decl\n");
9781#endif
9782 xmlParseXMLDecl(ctxt);
9783 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9784 /*
9785 * The XML REC instructs us to stop parsing right
9786 * here
9787 */
9788 ctxt->instate = XML_PARSER_EOF;
9789 return(0);
9790 }
9791 ctxt->standalone = ctxt->input->standalone;
9792 if ((ctxt->encoding == NULL) &&
9793 (ctxt->input->encoding != NULL))
9794 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9795 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9796 (!ctxt->disableSAX))
9797 ctxt->sax->startDocument(ctxt->userData);
9798 ctxt->instate = XML_PARSER_MISC;
9799#ifdef DEBUG_PUSH
9800 xmlGenericError(xmlGenericErrorContext,
9801 "PP: entering MISC\n");
9802#endif
9803 } else {
9804 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9805 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9806 (!ctxt->disableSAX))
9807 ctxt->sax->startDocument(ctxt->userData);
9808 ctxt->instate = XML_PARSER_MISC;
9809#ifdef DEBUG_PUSH
9810 xmlGenericError(xmlGenericErrorContext,
9811 "PP: entering MISC\n");
9812#endif
9813 }
9814 } else {
9815 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9816 ctxt->sax->setDocumentLocator(ctxt->userData,
9817 &xmlDefaultSAXLocator);
9818 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009819 if (ctxt->version == NULL) {
9820 xmlErrMemory(ctxt, NULL);
9821 break;
9822 }
Owen Taylor3473f882001-02-23 17:55:21 +00009823 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9824 (!ctxt->disableSAX))
9825 ctxt->sax->startDocument(ctxt->userData);
9826 ctxt->instate = XML_PARSER_MISC;
9827#ifdef DEBUG_PUSH
9828 xmlGenericError(xmlGenericErrorContext,
9829 "PP: entering MISC\n");
9830#endif
9831 }
9832 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009833 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009834 const xmlChar *name;
9835 const xmlChar *prefix;
9836 const xmlChar *URI;
9837 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009838
9839 if ((avail < 2) && (ctxt->inputNr == 1))
9840 goto done;
9841 cur = ctxt->input->cur[0];
9842 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009843 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009844 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009845 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9846 ctxt->sax->endDocument(ctxt->userData);
9847 goto done;
9848 }
9849 if (!terminate) {
9850 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009851 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009852 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009853 goto done;
9854 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9855 goto done;
9856 }
9857 }
9858 if (ctxt->spaceNr == 0)
9859 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009860 else if (*ctxt->space == -2)
9861 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009862 else
9863 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009864#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009865 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009866#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009867 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009868#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009869 else
9870 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009871#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009872 if (name == NULL) {
9873 spacePop(ctxt);
9874 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9876 ctxt->sax->endDocument(ctxt->userData);
9877 goto done;
9878 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009879#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009880 /*
9881 * [ VC: Root Element Type ]
9882 * The Name in the document type declaration must match
9883 * the element type of the root element.
9884 */
9885 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9886 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9887 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009888#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009889
9890 /*
9891 * Check for an Empty Element.
9892 */
9893 if ((RAW == '/') && (NXT(1) == '>')) {
9894 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009895
9896 if (ctxt->sax2) {
9897 if ((ctxt->sax != NULL) &&
9898 (ctxt->sax->endElementNs != NULL) &&
9899 (!ctxt->disableSAX))
9900 ctxt->sax->endElementNs(ctxt->userData, name,
9901 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009902 if (ctxt->nsNr - nsNr > 0)
9903 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009904#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009905 } else {
9906 if ((ctxt->sax != NULL) &&
9907 (ctxt->sax->endElement != NULL) &&
9908 (!ctxt->disableSAX))
9909 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009910#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009911 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009912 spacePop(ctxt);
9913 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009914 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009915 } else {
9916 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009917 }
9918 break;
9919 }
9920 if (RAW == '>') {
9921 NEXT;
9922 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009923 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009924 "Couldn't find end of Start Tag %s\n",
9925 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009926 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009927 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009928 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009929 if (ctxt->sax2)
9930 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009931#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009932 else
9933 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009934#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009935
Daniel Veillarda880b122003-04-21 21:36:41 +00009936 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009937 break;
9938 }
9939 case XML_PARSER_CONTENT: {
9940 const xmlChar *test;
9941 unsigned int cons;
9942 if ((avail < 2) && (ctxt->inputNr == 1))
9943 goto done;
9944 cur = ctxt->input->cur[0];
9945 next = ctxt->input->cur[1];
9946
9947 test = CUR_PTR;
9948 cons = ctxt->input->consumed;
9949 if ((cur == '<') && (next == '/')) {
9950 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009951 break;
9952 } else if ((cur == '<') && (next == '?')) {
9953 if ((!terminate) &&
9954 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9955 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009956 xmlParsePI(ctxt);
9957 } else if ((cur == '<') && (next != '!')) {
9958 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009959 break;
9960 } else if ((cur == '<') && (next == '!') &&
9961 (ctxt->input->cur[2] == '-') &&
9962 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009963 int term;
9964
9965 if (avail < 4)
9966 goto done;
9967 ctxt->input->cur += 4;
9968 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9969 ctxt->input->cur -= 4;
9970 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009971 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009972 xmlParseComment(ctxt);
9973 ctxt->instate = XML_PARSER_CONTENT;
9974 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9975 (ctxt->input->cur[2] == '[') &&
9976 (ctxt->input->cur[3] == 'C') &&
9977 (ctxt->input->cur[4] == 'D') &&
9978 (ctxt->input->cur[5] == 'A') &&
9979 (ctxt->input->cur[6] == 'T') &&
9980 (ctxt->input->cur[7] == 'A') &&
9981 (ctxt->input->cur[8] == '[')) {
9982 SKIP(9);
9983 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009984 break;
9985 } else if ((cur == '<') && (next == '!') &&
9986 (avail < 9)) {
9987 goto done;
9988 } else if (cur == '&') {
9989 if ((!terminate) &&
9990 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9991 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009992 xmlParseReference(ctxt);
9993 } else {
9994 /* TODO Avoid the extra copy, handle directly !!! */
9995 /*
9996 * Goal of the following test is:
9997 * - minimize calls to the SAX 'character' callback
9998 * when they are mergeable
9999 * - handle an problem for isBlank when we only parse
10000 * a sequence of blank chars and the next one is
10001 * not available to check against '<' presence.
10002 * - tries to homogenize the differences in SAX
10003 * callbacks between the push and pull versions
10004 * of the parser.
10005 */
10006 if ((ctxt->inputNr == 1) &&
10007 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10008 if (!terminate) {
10009 if (ctxt->progressive) {
10010 if ((lastlt == NULL) ||
10011 (ctxt->input->cur > lastlt))
10012 goto done;
10013 } else if (xmlParseLookupSequence(ctxt,
10014 '<', 0, 0) < 0) {
10015 goto done;
10016 }
10017 }
10018 }
10019 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 xmlParseCharData(ctxt, 0);
10021 }
10022 /*
10023 * Pop-up of finished entities.
10024 */
10025 while ((RAW == 0) && (ctxt->inputNr > 1))
10026 xmlPopInput(ctxt);
10027 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010028 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10029 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010030 ctxt->instate = XML_PARSER_EOF;
10031 break;
10032 }
10033 break;
10034 }
10035 case XML_PARSER_END_TAG:
10036 if (avail < 2)
10037 goto done;
10038 if (!terminate) {
10039 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010040 /* > can be found unescaped in attribute values */
10041 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010042 goto done;
10043 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10044 goto done;
10045 }
10046 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010047 if (ctxt->sax2) {
10048 xmlParseEndTag2(ctxt,
10049 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10050 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010051 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010052 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010053 }
10054#ifdef LIBXML_SAX1_ENABLED
10055 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010056 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010057#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010058 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010059 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010060 } else {
10061 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010062 }
10063 break;
10064 case XML_PARSER_CDATA_SECTION: {
10065 /*
10066 * The Push mode need to have the SAX callback for
10067 * cdataBlock merge back contiguous callbacks.
10068 */
10069 int base;
10070
10071 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10072 if (base < 0) {
10073 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010074 int tmp;
10075
10076 tmp = xmlCheckCdataPush(ctxt->input->cur,
10077 XML_PARSER_BIG_BUFFER_SIZE);
10078 if (tmp < 0) {
10079 tmp = -tmp;
10080 ctxt->input->cur += tmp;
10081 goto encoding_error;
10082 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010083 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10084 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010085 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010086 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010087 else if (ctxt->sax->characters != NULL)
10088 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010089 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010090 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010091 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010092 ctxt->checkIndex = 0;
10093 }
10094 goto done;
10095 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010096 int tmp;
10097
10098 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10099 if ((tmp < 0) || (tmp != base)) {
10100 tmp = -tmp;
10101 ctxt->input->cur += tmp;
10102 goto encoding_error;
10103 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010104 if ((ctxt->sax != NULL) && (base > 0) &&
10105 (!ctxt->disableSAX)) {
10106 if (ctxt->sax->cdataBlock != NULL)
10107 ctxt->sax->cdataBlock(ctxt->userData,
10108 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010109 else if (ctxt->sax->characters != NULL)
10110 ctxt->sax->characters(ctxt->userData,
10111 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010112 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010113 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010114 ctxt->checkIndex = 0;
10115 ctxt->instate = XML_PARSER_CONTENT;
10116#ifdef DEBUG_PUSH
10117 xmlGenericError(xmlGenericErrorContext,
10118 "PP: entering CONTENT\n");
10119#endif
10120 }
10121 break;
10122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123 case XML_PARSER_MISC:
10124 SKIP_BLANKS;
10125 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010126 avail = ctxt->input->length -
10127 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010129 avail = ctxt->input->buf->buffer->use -
10130 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010131 if (avail < 2)
10132 goto done;
10133 cur = ctxt->input->cur[0];
10134 next = ctxt->input->cur[1];
10135 if ((cur == '<') && (next == '?')) {
10136 if ((!terminate) &&
10137 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10138 goto done;
10139#ifdef DEBUG_PUSH
10140 xmlGenericError(xmlGenericErrorContext,
10141 "PP: Parsing PI\n");
10142#endif
10143 xmlParsePI(ctxt);
10144 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010145 (ctxt->input->cur[2] == '-') &&
10146 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010147 if ((!terminate) &&
10148 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10149 goto done;
10150#ifdef DEBUG_PUSH
10151 xmlGenericError(xmlGenericErrorContext,
10152 "PP: Parsing Comment\n");
10153#endif
10154 xmlParseComment(ctxt);
10155 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010156 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010157 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010158 (ctxt->input->cur[2] == 'D') &&
10159 (ctxt->input->cur[3] == 'O') &&
10160 (ctxt->input->cur[4] == 'C') &&
10161 (ctxt->input->cur[5] == 'T') &&
10162 (ctxt->input->cur[6] == 'Y') &&
10163 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010164 (ctxt->input->cur[8] == 'E')) {
10165 if ((!terminate) &&
10166 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10167 goto done;
10168#ifdef DEBUG_PUSH
10169 xmlGenericError(xmlGenericErrorContext,
10170 "PP: Parsing internal subset\n");
10171#endif
10172 ctxt->inSubset = 1;
10173 xmlParseDocTypeDecl(ctxt);
10174 if (RAW == '[') {
10175 ctxt->instate = XML_PARSER_DTD;
10176#ifdef DEBUG_PUSH
10177 xmlGenericError(xmlGenericErrorContext,
10178 "PP: entering DTD\n");
10179#endif
10180 } else {
10181 /*
10182 * Create and update the external subset.
10183 */
10184 ctxt->inSubset = 2;
10185 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10186 (ctxt->sax->externalSubset != NULL))
10187 ctxt->sax->externalSubset(ctxt->userData,
10188 ctxt->intSubName, ctxt->extSubSystem,
10189 ctxt->extSubURI);
10190 ctxt->inSubset = 0;
10191 ctxt->instate = XML_PARSER_PROLOG;
10192#ifdef DEBUG_PUSH
10193 xmlGenericError(xmlGenericErrorContext,
10194 "PP: entering PROLOG\n");
10195#endif
10196 }
10197 } else if ((cur == '<') && (next == '!') &&
10198 (avail < 9)) {
10199 goto done;
10200 } else {
10201 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010202 ctxt->progressive = 1;
10203 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010204#ifdef DEBUG_PUSH
10205 xmlGenericError(xmlGenericErrorContext,
10206 "PP: entering START_TAG\n");
10207#endif
10208 }
10209 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010210 case XML_PARSER_PROLOG:
10211 SKIP_BLANKS;
10212 if (ctxt->input->buf == NULL)
10213 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10214 else
10215 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10216 if (avail < 2)
10217 goto done;
10218 cur = ctxt->input->cur[0];
10219 next = ctxt->input->cur[1];
10220 if ((cur == '<') && (next == '?')) {
10221 if ((!terminate) &&
10222 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10223 goto done;
10224#ifdef DEBUG_PUSH
10225 xmlGenericError(xmlGenericErrorContext,
10226 "PP: Parsing PI\n");
10227#endif
10228 xmlParsePI(ctxt);
10229 } else if ((cur == '<') && (next == '!') &&
10230 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10231 if ((!terminate) &&
10232 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10233 goto done;
10234#ifdef DEBUG_PUSH
10235 xmlGenericError(xmlGenericErrorContext,
10236 "PP: Parsing Comment\n");
10237#endif
10238 xmlParseComment(ctxt);
10239 ctxt->instate = XML_PARSER_PROLOG;
10240 } else if ((cur == '<') && (next == '!') &&
10241 (avail < 4)) {
10242 goto done;
10243 } else {
10244 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010245 if (ctxt->progressive == 0)
10246 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010247 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010248#ifdef DEBUG_PUSH
10249 xmlGenericError(xmlGenericErrorContext,
10250 "PP: entering START_TAG\n");
10251#endif
10252 }
10253 break;
10254 case XML_PARSER_EPILOG:
10255 SKIP_BLANKS;
10256 if (ctxt->input->buf == NULL)
10257 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10258 else
10259 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10260 if (avail < 2)
10261 goto done;
10262 cur = ctxt->input->cur[0];
10263 next = ctxt->input->cur[1];
10264 if ((cur == '<') && (next == '?')) {
10265 if ((!terminate) &&
10266 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10267 goto done;
10268#ifdef DEBUG_PUSH
10269 xmlGenericError(xmlGenericErrorContext,
10270 "PP: Parsing PI\n");
10271#endif
10272 xmlParsePI(ctxt);
10273 ctxt->instate = XML_PARSER_EPILOG;
10274 } else if ((cur == '<') && (next == '!') &&
10275 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10276 if ((!terminate) &&
10277 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10278 goto done;
10279#ifdef DEBUG_PUSH
10280 xmlGenericError(xmlGenericErrorContext,
10281 "PP: Parsing Comment\n");
10282#endif
10283 xmlParseComment(ctxt);
10284 ctxt->instate = XML_PARSER_EPILOG;
10285 } else if ((cur == '<') && (next == '!') &&
10286 (avail < 4)) {
10287 goto done;
10288 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010289 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010290 ctxt->instate = XML_PARSER_EOF;
10291#ifdef DEBUG_PUSH
10292 xmlGenericError(xmlGenericErrorContext,
10293 "PP: entering EOF\n");
10294#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010295 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010296 ctxt->sax->endDocument(ctxt->userData);
10297 goto done;
10298 }
10299 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010300 case XML_PARSER_DTD: {
10301 /*
10302 * Sorry but progressive parsing of the internal subset
10303 * is not expected to be supported. We first check that
10304 * the full content of the internal subset is available and
10305 * the parsing is launched only at that point.
10306 * Internal subset ends up with "']' S? '>'" in an unescaped
10307 * section and not in a ']]>' sequence which are conditional
10308 * sections (whoever argued to keep that crap in XML deserve
10309 * a place in hell !).
10310 */
10311 int base, i;
10312 xmlChar *buf;
10313 xmlChar quote = 0;
10314
10315 base = ctxt->input->cur - ctxt->input->base;
10316 if (base < 0) return(0);
10317 if (ctxt->checkIndex > base)
10318 base = ctxt->checkIndex;
10319 buf = ctxt->input->buf->buffer->content;
10320 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10321 base++) {
10322 if (quote != 0) {
10323 if (buf[base] == quote)
10324 quote = 0;
10325 continue;
10326 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010327 if ((quote == 0) && (buf[base] == '<')) {
10328 int found = 0;
10329 /* special handling of comments */
10330 if (((unsigned int) base + 4 <
10331 ctxt->input->buf->buffer->use) &&
10332 (buf[base + 1] == '!') &&
10333 (buf[base + 2] == '-') &&
10334 (buf[base + 3] == '-')) {
10335 for (;(unsigned int) base + 3 <
10336 ctxt->input->buf->buffer->use; base++) {
10337 if ((buf[base] == '-') &&
10338 (buf[base + 1] == '-') &&
10339 (buf[base + 2] == '>')) {
10340 found = 1;
10341 base += 2;
10342 break;
10343 }
10344 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010345 if (!found) {
10346#if 0
10347 fprintf(stderr, "unfinished comment\n");
10348#endif
10349 break; /* for */
10350 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010351 continue;
10352 }
10353 }
Owen Taylor3473f882001-02-23 17:55:21 +000010354 if (buf[base] == '"') {
10355 quote = '"';
10356 continue;
10357 }
10358 if (buf[base] == '\'') {
10359 quote = '\'';
10360 continue;
10361 }
10362 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010363#if 0
10364 fprintf(stderr, "%c%c%c%c: ", buf[base],
10365 buf[base + 1], buf[base + 2], buf[base + 3]);
10366#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010367 if ((unsigned int) base +1 >=
10368 ctxt->input->buf->buffer->use)
10369 break;
10370 if (buf[base + 1] == ']') {
10371 /* conditional crap, skip both ']' ! */
10372 base++;
10373 continue;
10374 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010375 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010376 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10377 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010378 if (buf[base + i] == '>') {
10379#if 0
10380 fprintf(stderr, "found\n");
10381#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010382 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010383 }
10384 if (!IS_BLANK_CH(buf[base + i])) {
10385#if 0
10386 fprintf(stderr, "not found\n");
10387#endif
10388 goto not_end_of_int_subset;
10389 }
Owen Taylor3473f882001-02-23 17:55:21 +000010390 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010391#if 0
10392 fprintf(stderr, "end of stream\n");
10393#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010394 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010395
Owen Taylor3473f882001-02-23 17:55:21 +000010396 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010397not_end_of_int_subset:
10398 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010399 }
10400 /*
10401 * We didn't found the end of the Internal subset
10402 */
Owen Taylor3473f882001-02-23 17:55:21 +000010403#ifdef DEBUG_PUSH
10404 if (next == 0)
10405 xmlGenericError(xmlGenericErrorContext,
10406 "PP: lookup of int subset end filed\n");
10407#endif
10408 goto done;
10409
10410found_end_int_subset:
10411 xmlParseInternalSubset(ctxt);
10412 ctxt->inSubset = 2;
10413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10414 (ctxt->sax->externalSubset != NULL))
10415 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10416 ctxt->extSubSystem, ctxt->extSubURI);
10417 ctxt->inSubset = 0;
10418 ctxt->instate = XML_PARSER_PROLOG;
10419 ctxt->checkIndex = 0;
10420#ifdef DEBUG_PUSH
10421 xmlGenericError(xmlGenericErrorContext,
10422 "PP: entering PROLOG\n");
10423#endif
10424 break;
10425 }
10426 case XML_PARSER_COMMENT:
10427 xmlGenericError(xmlGenericErrorContext,
10428 "PP: internal error, state == COMMENT\n");
10429 ctxt->instate = XML_PARSER_CONTENT;
10430#ifdef DEBUG_PUSH
10431 xmlGenericError(xmlGenericErrorContext,
10432 "PP: entering CONTENT\n");
10433#endif
10434 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010435 case XML_PARSER_IGNORE:
10436 xmlGenericError(xmlGenericErrorContext,
10437 "PP: internal error, state == IGNORE");
10438 ctxt->instate = XML_PARSER_DTD;
10439#ifdef DEBUG_PUSH
10440 xmlGenericError(xmlGenericErrorContext,
10441 "PP: entering DTD\n");
10442#endif
10443 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010444 case XML_PARSER_PI:
10445 xmlGenericError(xmlGenericErrorContext,
10446 "PP: internal error, state == PI\n");
10447 ctxt->instate = XML_PARSER_CONTENT;
10448#ifdef DEBUG_PUSH
10449 xmlGenericError(xmlGenericErrorContext,
10450 "PP: entering CONTENT\n");
10451#endif
10452 break;
10453 case XML_PARSER_ENTITY_DECL:
10454 xmlGenericError(xmlGenericErrorContext,
10455 "PP: internal error, state == ENTITY_DECL\n");
10456 ctxt->instate = XML_PARSER_DTD;
10457#ifdef DEBUG_PUSH
10458 xmlGenericError(xmlGenericErrorContext,
10459 "PP: entering DTD\n");
10460#endif
10461 break;
10462 case XML_PARSER_ENTITY_VALUE:
10463 xmlGenericError(xmlGenericErrorContext,
10464 "PP: internal error, state == ENTITY_VALUE\n");
10465 ctxt->instate = XML_PARSER_CONTENT;
10466#ifdef DEBUG_PUSH
10467 xmlGenericError(xmlGenericErrorContext,
10468 "PP: entering DTD\n");
10469#endif
10470 break;
10471 case XML_PARSER_ATTRIBUTE_VALUE:
10472 xmlGenericError(xmlGenericErrorContext,
10473 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10474 ctxt->instate = XML_PARSER_START_TAG;
10475#ifdef DEBUG_PUSH
10476 xmlGenericError(xmlGenericErrorContext,
10477 "PP: entering START_TAG\n");
10478#endif
10479 break;
10480 case XML_PARSER_SYSTEM_LITERAL:
10481 xmlGenericError(xmlGenericErrorContext,
10482 "PP: internal error, state == SYSTEM_LITERAL\n");
10483 ctxt->instate = XML_PARSER_START_TAG;
10484#ifdef DEBUG_PUSH
10485 xmlGenericError(xmlGenericErrorContext,
10486 "PP: entering START_TAG\n");
10487#endif
10488 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010489 case XML_PARSER_PUBLIC_LITERAL:
10490 xmlGenericError(xmlGenericErrorContext,
10491 "PP: internal error, state == PUBLIC_LITERAL\n");
10492 ctxt->instate = XML_PARSER_START_TAG;
10493#ifdef DEBUG_PUSH
10494 xmlGenericError(xmlGenericErrorContext,
10495 "PP: entering START_TAG\n");
10496#endif
10497 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010498 }
10499 }
10500done:
10501#ifdef DEBUG_PUSH
10502 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10503#endif
10504 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010505encoding_error:
10506 {
10507 char buffer[150];
10508
10509 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10510 ctxt->input->cur[0], ctxt->input->cur[1],
10511 ctxt->input->cur[2], ctxt->input->cur[3]);
10512 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10513 "Input is not proper UTF-8, indicate encoding !\n%s",
10514 BAD_CAST buffer, NULL);
10515 }
10516 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010517}
10518
10519/**
Owen Taylor3473f882001-02-23 17:55:21 +000010520 * xmlParseChunk:
10521 * @ctxt: an XML parser context
10522 * @chunk: an char array
10523 * @size: the size in byte of the chunk
10524 * @terminate: last chunk indicator
10525 *
10526 * Parse a Chunk of memory
10527 *
10528 * Returns zero if no error, the xmlParserErrors otherwise.
10529 */
10530int
10531xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10532 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010533 int end_in_lf = 0;
10534
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010535 if (ctxt == NULL)
10536 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010537 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010538 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010539 if (ctxt->instate == XML_PARSER_START)
10540 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010541 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10542 (chunk[size - 1] == '\r')) {
10543 end_in_lf = 1;
10544 size--;
10545 }
Owen Taylor3473f882001-02-23 17:55:21 +000010546 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10547 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10548 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10549 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010550 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010551
William M. Bracka3215c72004-07-31 16:24:01 +000010552 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10553 if (res < 0) {
10554 ctxt->errNo = XML_PARSER_EOF;
10555 ctxt->disableSAX = 1;
10556 return (XML_PARSER_EOF);
10557 }
Owen Taylor3473f882001-02-23 17:55:21 +000010558 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10559 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010560 ctxt->input->end =
10561 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010562#ifdef DEBUG_PUSH
10563 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10564#endif
10565
Owen Taylor3473f882001-02-23 17:55:21 +000010566 } else if (ctxt->instate != XML_PARSER_EOF) {
10567 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10568 xmlParserInputBufferPtr in = ctxt->input->buf;
10569 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10570 (in->raw != NULL)) {
10571 int nbchars;
10572
10573 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10574 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010575 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlGenericError(xmlGenericErrorContext,
10577 "xmlParseChunk: encoder error\n");
10578 return(XML_ERR_INVALID_ENCODING);
10579 }
10580 }
10581 }
10582 }
10583 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010584 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10585 (ctxt->input->buf != NULL)) {
10586 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10587 }
Daniel Veillard14412512005-01-21 23:53:26 +000010588 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010589 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010590 if (terminate) {
10591 /*
10592 * Check for termination
10593 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010594 int avail = 0;
10595
10596 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010597 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010598 avail = ctxt->input->length -
10599 (ctxt->input->cur - ctxt->input->base);
10600 else
10601 avail = ctxt->input->buf->buffer->use -
10602 (ctxt->input->cur - ctxt->input->base);
10603 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010604
Owen Taylor3473f882001-02-23 17:55:21 +000010605 if ((ctxt->instate != XML_PARSER_EOF) &&
10606 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010607 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010608 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010609 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010610 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010611 }
Owen Taylor3473f882001-02-23 17:55:21 +000010612 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010613 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010614 ctxt->sax->endDocument(ctxt->userData);
10615 }
10616 ctxt->instate = XML_PARSER_EOF;
10617 }
10618 return((xmlParserErrors) ctxt->errNo);
10619}
10620
10621/************************************************************************
10622 * *
10623 * I/O front end functions to the parser *
10624 * *
10625 ************************************************************************/
10626
10627/**
Owen Taylor3473f882001-02-23 17:55:21 +000010628 * xmlCreatePushParserCtxt:
10629 * @sax: a SAX handler
10630 * @user_data: The user data returned on SAX callbacks
10631 * @chunk: a pointer to an array of chars
10632 * @size: number of chars in the array
10633 * @filename: an optional file name or URI
10634 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010635 * Create a parser context for using the XML parser in push mode.
10636 * If @buffer and @size are non-NULL, the data is used to detect
10637 * the encoding. The remaining characters will be parsed so they
10638 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010639 * To allow content encoding detection, @size should be >= 4
10640 * The value of @filename is used for fetching external entities
10641 * and error/warning reports.
10642 *
10643 * Returns the new parser context or NULL
10644 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010645
Owen Taylor3473f882001-02-23 17:55:21 +000010646xmlParserCtxtPtr
10647xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10648 const char *chunk, int size, const char *filename) {
10649 xmlParserCtxtPtr ctxt;
10650 xmlParserInputPtr inputStream;
10651 xmlParserInputBufferPtr buf;
10652 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10653
10654 /*
10655 * plug some encoding conversion routines
10656 */
10657 if ((chunk != NULL) && (size >= 4))
10658 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10659
10660 buf = xmlAllocParserInputBuffer(enc);
10661 if (buf == NULL) return(NULL);
10662
10663 ctxt = xmlNewParserCtxt();
10664 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010665 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010666 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010667 return(NULL);
10668 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010669 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010670 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10671 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010672 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010673 xmlFreeParserInputBuffer(buf);
10674 xmlFreeParserCtxt(ctxt);
10675 return(NULL);
10676 }
Owen Taylor3473f882001-02-23 17:55:21 +000010677 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010678#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010679 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010680#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010681 xmlFree(ctxt->sax);
10682 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10683 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010684 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010685 xmlFreeParserInputBuffer(buf);
10686 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010687 return(NULL);
10688 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010689 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10690 if (sax->initialized == XML_SAX2_MAGIC)
10691 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10692 else
10693 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010694 if (user_data != NULL)
10695 ctxt->userData = user_data;
10696 }
10697 if (filename == NULL) {
10698 ctxt->directory = NULL;
10699 } else {
10700 ctxt->directory = xmlParserGetDirectory(filename);
10701 }
10702
10703 inputStream = xmlNewInputStream(ctxt);
10704 if (inputStream == NULL) {
10705 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010706 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010707 return(NULL);
10708 }
10709
10710 if (filename == NULL)
10711 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010712 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010713 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010714 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010715 if (inputStream->filename == NULL) {
10716 xmlFreeParserCtxt(ctxt);
10717 xmlFreeParserInputBuffer(buf);
10718 return(NULL);
10719 }
10720 }
Owen Taylor3473f882001-02-23 17:55:21 +000010721 inputStream->buf = buf;
10722 inputStream->base = inputStream->buf->buffer->content;
10723 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010724 inputStream->end =
10725 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010726
10727 inputPush(ctxt, inputStream);
10728
William M. Brack3a1cd212005-02-11 14:35:54 +000010729 /*
10730 * If the caller didn't provide an initial 'chunk' for determining
10731 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10732 * that it can be automatically determined later
10733 */
10734 if ((size == 0) || (chunk == NULL)) {
10735 ctxt->charset = XML_CHAR_ENCODING_NONE;
10736 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010737 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10738 int cur = ctxt->input->cur - ctxt->input->base;
10739
Owen Taylor3473f882001-02-23 17:55:21 +000010740 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010741
10742 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10743 ctxt->input->cur = ctxt->input->base + cur;
10744 ctxt->input->end =
10745 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010746#ifdef DEBUG_PUSH
10747 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10748#endif
10749 }
10750
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010751 if (enc != XML_CHAR_ENCODING_NONE) {
10752 xmlSwitchEncoding(ctxt, enc);
10753 }
10754
Owen Taylor3473f882001-02-23 17:55:21 +000010755 return(ctxt);
10756}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010757#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010758
10759/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010760 * xmlStopParser:
10761 * @ctxt: an XML parser context
10762 *
10763 * Blocks further parser processing
10764 */
10765void
10766xmlStopParser(xmlParserCtxtPtr ctxt) {
10767 if (ctxt == NULL)
10768 return;
10769 ctxt->instate = XML_PARSER_EOF;
10770 ctxt->disableSAX = 1;
10771 if (ctxt->input != NULL) {
10772 ctxt->input->cur = BAD_CAST"";
10773 ctxt->input->base = ctxt->input->cur;
10774 }
10775}
10776
10777/**
Owen Taylor3473f882001-02-23 17:55:21 +000010778 * xmlCreateIOParserCtxt:
10779 * @sax: a SAX handler
10780 * @user_data: The user data returned on SAX callbacks
10781 * @ioread: an I/O read function
10782 * @ioclose: an I/O close function
10783 * @ioctx: an I/O handler
10784 * @enc: the charset encoding if known
10785 *
10786 * Create a parser context for using the XML parser with an existing
10787 * I/O stream
10788 *
10789 * Returns the new parser context or NULL
10790 */
10791xmlParserCtxtPtr
10792xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10793 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10794 void *ioctx, xmlCharEncoding enc) {
10795 xmlParserCtxtPtr ctxt;
10796 xmlParserInputPtr inputStream;
10797 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010798
10799 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010800
10801 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10802 if (buf == NULL) return(NULL);
10803
10804 ctxt = xmlNewParserCtxt();
10805 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010806 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010807 return(NULL);
10808 }
10809 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010810#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010811 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010812#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010813 xmlFree(ctxt->sax);
10814 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10815 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010816 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010817 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010818 return(NULL);
10819 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010820 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10821 if (sax->initialized == XML_SAX2_MAGIC)
10822 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10823 else
10824 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010825 if (user_data != NULL)
10826 ctxt->userData = user_data;
10827 }
10828
10829 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10830 if (inputStream == NULL) {
10831 xmlFreeParserCtxt(ctxt);
10832 return(NULL);
10833 }
10834 inputPush(ctxt, inputStream);
10835
10836 return(ctxt);
10837}
10838
Daniel Veillard4432df22003-09-28 18:58:27 +000010839#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010840/************************************************************************
10841 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010842 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010843 * *
10844 ************************************************************************/
10845
10846/**
10847 * xmlIOParseDTD:
10848 * @sax: the SAX handler block or NULL
10849 * @input: an Input Buffer
10850 * @enc: the charset encoding if known
10851 *
10852 * Load and parse a DTD
10853 *
10854 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010855 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010856 */
10857
10858xmlDtdPtr
10859xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10860 xmlCharEncoding enc) {
10861 xmlDtdPtr ret = NULL;
10862 xmlParserCtxtPtr ctxt;
10863 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010864 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010865
10866 if (input == NULL)
10867 return(NULL);
10868
10869 ctxt = xmlNewParserCtxt();
10870 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010871 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010872 return(NULL);
10873 }
10874
10875 /*
10876 * Set-up the SAX context
10877 */
10878 if (sax != NULL) {
10879 if (ctxt->sax != NULL)
10880 xmlFree(ctxt->sax);
10881 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010882 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010883 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010884 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010885
10886 /*
10887 * generate a parser input from the I/O handler
10888 */
10889
Daniel Veillard43caefb2003-12-07 19:32:22 +000010890 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010891 if (pinput == NULL) {
10892 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010893 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010894 xmlFreeParserCtxt(ctxt);
10895 return(NULL);
10896 }
10897
10898 /*
10899 * plug some encoding conversion routines here.
10900 */
10901 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010902 if (enc != XML_CHAR_ENCODING_NONE) {
10903 xmlSwitchEncoding(ctxt, enc);
10904 }
Owen Taylor3473f882001-02-23 17:55:21 +000010905
10906 pinput->filename = NULL;
10907 pinput->line = 1;
10908 pinput->col = 1;
10909 pinput->base = ctxt->input->cur;
10910 pinput->cur = ctxt->input->cur;
10911 pinput->free = NULL;
10912
10913 /*
10914 * let's parse that entity knowing it's an external subset.
10915 */
10916 ctxt->inSubset = 2;
10917 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10918 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10919 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010920
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010921 if ((enc == XML_CHAR_ENCODING_NONE) &&
10922 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010923 /*
10924 * Get the 4 first bytes and decode the charset
10925 * if enc != XML_CHAR_ENCODING_NONE
10926 * plug some encoding conversion routines.
10927 */
10928 start[0] = RAW;
10929 start[1] = NXT(1);
10930 start[2] = NXT(2);
10931 start[3] = NXT(3);
10932 enc = xmlDetectCharEncoding(start, 4);
10933 if (enc != XML_CHAR_ENCODING_NONE) {
10934 xmlSwitchEncoding(ctxt, enc);
10935 }
10936 }
10937
Owen Taylor3473f882001-02-23 17:55:21 +000010938 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10939
10940 if (ctxt->myDoc != NULL) {
10941 if (ctxt->wellFormed) {
10942 ret = ctxt->myDoc->extSubset;
10943 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010944 if (ret != NULL) {
10945 xmlNodePtr tmp;
10946
10947 ret->doc = NULL;
10948 tmp = ret->children;
10949 while (tmp != NULL) {
10950 tmp->doc = NULL;
10951 tmp = tmp->next;
10952 }
10953 }
Owen Taylor3473f882001-02-23 17:55:21 +000010954 } else {
10955 ret = NULL;
10956 }
10957 xmlFreeDoc(ctxt->myDoc);
10958 ctxt->myDoc = NULL;
10959 }
10960 if (sax != NULL) ctxt->sax = NULL;
10961 xmlFreeParserCtxt(ctxt);
10962
10963 return(ret);
10964}
10965
10966/**
10967 * xmlSAXParseDTD:
10968 * @sax: the SAX handler block
10969 * @ExternalID: a NAME* containing the External ID of the DTD
10970 * @SystemID: a NAME* containing the URL to the DTD
10971 *
10972 * Load and parse an external subset.
10973 *
10974 * Returns the resulting xmlDtdPtr or NULL in case of error.
10975 */
10976
10977xmlDtdPtr
10978xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10979 const xmlChar *SystemID) {
10980 xmlDtdPtr ret = NULL;
10981 xmlParserCtxtPtr ctxt;
10982 xmlParserInputPtr input = NULL;
10983 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010984 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010985
10986 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10987
10988 ctxt = xmlNewParserCtxt();
10989 if (ctxt == NULL) {
10990 return(NULL);
10991 }
10992
10993 /*
10994 * Set-up the SAX context
10995 */
10996 if (sax != NULL) {
10997 if (ctxt->sax != NULL)
10998 xmlFree(ctxt->sax);
10999 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011000 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011001 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011002
11003 /*
11004 * Canonicalise the system ID
11005 */
11006 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011007 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011008 xmlFreeParserCtxt(ctxt);
11009 return(NULL);
11010 }
Owen Taylor3473f882001-02-23 17:55:21 +000011011
11012 /*
11013 * Ask the Entity resolver to load the damn thing
11014 */
11015
11016 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011017 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11018 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011019 if (input == NULL) {
11020 if (sax != NULL) ctxt->sax = NULL;
11021 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011022 if (systemIdCanonic != NULL)
11023 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011024 return(NULL);
11025 }
11026
11027 /*
11028 * plug some encoding conversion routines here.
11029 */
11030 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011031 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11032 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11033 xmlSwitchEncoding(ctxt, enc);
11034 }
Owen Taylor3473f882001-02-23 17:55:21 +000011035
11036 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011037 input->filename = (char *) systemIdCanonic;
11038 else
11039 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011040 input->line = 1;
11041 input->col = 1;
11042 input->base = ctxt->input->cur;
11043 input->cur = ctxt->input->cur;
11044 input->free = NULL;
11045
11046 /*
11047 * let's parse that entity knowing it's an external subset.
11048 */
11049 ctxt->inSubset = 2;
11050 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11051 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11052 ExternalID, SystemID);
11053 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11054
11055 if (ctxt->myDoc != NULL) {
11056 if (ctxt->wellFormed) {
11057 ret = ctxt->myDoc->extSubset;
11058 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011059 if (ret != NULL) {
11060 xmlNodePtr tmp;
11061
11062 ret->doc = NULL;
11063 tmp = ret->children;
11064 while (tmp != NULL) {
11065 tmp->doc = NULL;
11066 tmp = tmp->next;
11067 }
11068 }
Owen Taylor3473f882001-02-23 17:55:21 +000011069 } else {
11070 ret = NULL;
11071 }
11072 xmlFreeDoc(ctxt->myDoc);
11073 ctxt->myDoc = NULL;
11074 }
11075 if (sax != NULL) ctxt->sax = NULL;
11076 xmlFreeParserCtxt(ctxt);
11077
11078 return(ret);
11079}
11080
Daniel Veillard4432df22003-09-28 18:58:27 +000011081
Owen Taylor3473f882001-02-23 17:55:21 +000011082/**
11083 * xmlParseDTD:
11084 * @ExternalID: a NAME* containing the External ID of the DTD
11085 * @SystemID: a NAME* containing the URL to the DTD
11086 *
11087 * Load and parse an external subset.
11088 *
11089 * Returns the resulting xmlDtdPtr or NULL in case of error.
11090 */
11091
11092xmlDtdPtr
11093xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11094 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11095}
Daniel Veillard4432df22003-09-28 18:58:27 +000011096#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011097
11098/************************************************************************
11099 * *
11100 * Front ends when parsing an Entity *
11101 * *
11102 ************************************************************************/
11103
11104/**
Owen Taylor3473f882001-02-23 17:55:21 +000011105 * xmlParseCtxtExternalEntity:
11106 * @ctx: the existing parsing context
11107 * @URL: the URL for the entity to load
11108 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011109 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011110 *
11111 * Parse an external general entity within an existing parsing context
11112 * An external general parsed entity is well-formed if it matches the
11113 * production labeled extParsedEnt.
11114 *
11115 * [78] extParsedEnt ::= TextDecl? content
11116 *
11117 * Returns 0 if the entity is well formed, -1 in case of args problem and
11118 * the parser error code otherwise
11119 */
11120
11121int
11122xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011123 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011124 xmlParserCtxtPtr ctxt;
11125 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011126 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011127 xmlSAXHandlerPtr oldsax = NULL;
11128 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011129 xmlChar start[4];
11130 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011131 xmlParserInputPtr inputStream;
11132 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011133
Daniel Veillardce682bc2004-11-05 17:22:25 +000011134 if (ctx == NULL) return(-1);
11135
Owen Taylor3473f882001-02-23 17:55:21 +000011136 if (ctx->depth > 40) {
11137 return(XML_ERR_ENTITY_LOOP);
11138 }
11139
Daniel Veillardcda96922001-08-21 10:56:31 +000011140 if (lst != NULL)
11141 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011142 if ((URL == NULL) && (ID == NULL))
11143 return(-1);
11144 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11145 return(-1);
11146
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011147 ctxt = xmlNewParserCtxt();
11148 if (ctxt == NULL) {
11149 return(-1);
11150 }
11151
Owen Taylor3473f882001-02-23 17:55:21 +000011152 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011153 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011154
11155 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11156 if (inputStream == NULL) {
11157 xmlFreeParserCtxt(ctxt);
11158 return(-1);
11159 }
11160
11161 inputPush(ctxt, inputStream);
11162
11163 if ((ctxt->directory == NULL) && (directory == NULL))
11164 directory = xmlParserGetDirectory((char *)URL);
11165 if ((ctxt->directory == NULL) && (directory != NULL))
11166 ctxt->directory = directory;
11167
Owen Taylor3473f882001-02-23 17:55:21 +000011168 oldsax = ctxt->sax;
11169 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011170 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011171 newDoc = xmlNewDoc(BAD_CAST "1.0");
11172 if (newDoc == NULL) {
11173 xmlFreeParserCtxt(ctxt);
11174 return(-1);
11175 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011176 if (ctx->myDoc->dict) {
11177 newDoc->dict = ctx->myDoc->dict;
11178 xmlDictReference(newDoc->dict);
11179 }
Owen Taylor3473f882001-02-23 17:55:21 +000011180 if (ctx->myDoc != NULL) {
11181 newDoc->intSubset = ctx->myDoc->intSubset;
11182 newDoc->extSubset = ctx->myDoc->extSubset;
11183 }
11184 if (ctx->myDoc->URL != NULL) {
11185 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11186 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011187 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11188 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011189 ctxt->sax = oldsax;
11190 xmlFreeParserCtxt(ctxt);
11191 newDoc->intSubset = NULL;
11192 newDoc->extSubset = NULL;
11193 xmlFreeDoc(newDoc);
11194 return(-1);
11195 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011196 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011197 nodePush(ctxt, newDoc->children);
11198 if (ctx->myDoc == NULL) {
11199 ctxt->myDoc = newDoc;
11200 } else {
11201 ctxt->myDoc = ctx->myDoc;
11202 newDoc->children->doc = ctx->myDoc;
11203 }
11204
Daniel Veillard87a764e2001-06-20 17:41:10 +000011205 /*
11206 * Get the 4 first bytes and decode the charset
11207 * if enc != XML_CHAR_ENCODING_NONE
11208 * plug some encoding conversion routines.
11209 */
11210 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011211 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11212 start[0] = RAW;
11213 start[1] = NXT(1);
11214 start[2] = NXT(2);
11215 start[3] = NXT(3);
11216 enc = xmlDetectCharEncoding(start, 4);
11217 if (enc != XML_CHAR_ENCODING_NONE) {
11218 xmlSwitchEncoding(ctxt, enc);
11219 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011220 }
11221
Owen Taylor3473f882001-02-23 17:55:21 +000011222 /*
11223 * Parse a possible text declaration first
11224 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011225 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011226 xmlParseTextDecl(ctxt);
11227 }
11228
11229 /*
11230 * Doing validity checking on chunk doesn't make sense
11231 */
11232 ctxt->instate = XML_PARSER_CONTENT;
11233 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011234 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011235 ctxt->loadsubset = ctx->loadsubset;
11236 ctxt->depth = ctx->depth + 1;
11237 ctxt->replaceEntities = ctx->replaceEntities;
11238 if (ctxt->validate) {
11239 ctxt->vctxt.error = ctx->vctxt.error;
11240 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011241 } else {
11242 ctxt->vctxt.error = NULL;
11243 ctxt->vctxt.warning = NULL;
11244 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011245 ctxt->vctxt.nodeTab = NULL;
11246 ctxt->vctxt.nodeNr = 0;
11247 ctxt->vctxt.nodeMax = 0;
11248 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011249 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11250 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011251 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11252 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11253 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011254 ctxt->dictNames = ctx->dictNames;
11255 ctxt->attsDefault = ctx->attsDefault;
11256 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011257 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011258
11259 xmlParseContent(ctxt);
11260
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011261 ctx->validate = ctxt->validate;
11262 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011263 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011264 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011265 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011266 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011267 }
11268 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011269 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011270 }
11271
11272 if (!ctxt->wellFormed) {
11273 if (ctxt->errNo == 0)
11274 ret = 1;
11275 else
11276 ret = ctxt->errNo;
11277 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011278 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011279 xmlNodePtr cur;
11280
11281 /*
11282 * Return the newly created nodeset after unlinking it from
11283 * they pseudo parent.
11284 */
11285 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011286 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011287 while (cur != NULL) {
11288 cur->parent = NULL;
11289 cur = cur->next;
11290 }
11291 newDoc->children->children = NULL;
11292 }
11293 ret = 0;
11294 }
11295 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011296 ctxt->dict = NULL;
11297 ctxt->attsDefault = NULL;
11298 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011299 xmlFreeParserCtxt(ctxt);
11300 newDoc->intSubset = NULL;
11301 newDoc->extSubset = NULL;
11302 xmlFreeDoc(newDoc);
11303
11304 return(ret);
11305}
11306
11307/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011308 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011309 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011310 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011311 * @sax: the SAX handler bloc (possibly NULL)
11312 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11313 * @depth: Used for loop detection, use 0
11314 * @URL: the URL for the entity to load
11315 * @ID: the System ID for the entity to load
11316 * @list: the return value for the set of parsed nodes
11317 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011318 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011319 *
11320 * Returns 0 if the entity is well formed, -1 in case of args problem and
11321 * the parser error code otherwise
11322 */
11323
Daniel Veillard7d515752003-09-26 19:12:37 +000011324static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011325xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11326 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011327 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011328 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011329 xmlParserCtxtPtr ctxt;
11330 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011331 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011332 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011333 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011334 xmlChar start[4];
11335 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011336
11337 if (depth > 40) {
11338 return(XML_ERR_ENTITY_LOOP);
11339 }
11340
11341
11342
11343 if (list != NULL)
11344 *list = NULL;
11345 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011346 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011347 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011348 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011349
11350
11351 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011352 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011353 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011354 if (oldctxt != NULL) {
11355 ctxt->_private = oldctxt->_private;
11356 ctxt->loadsubset = oldctxt->loadsubset;
11357 ctxt->validate = oldctxt->validate;
11358 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011359 ctxt->record_info = oldctxt->record_info;
11360 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11361 ctxt->node_seq.length = oldctxt->node_seq.length;
11362 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011363 } else {
11364 /*
11365 * Doing validity checking on chunk without context
11366 * doesn't make sense
11367 */
11368 ctxt->_private = NULL;
11369 ctxt->validate = 0;
11370 ctxt->external = 2;
11371 ctxt->loadsubset = 0;
11372 }
Owen Taylor3473f882001-02-23 17:55:21 +000011373 if (sax != NULL) {
11374 oldsax = ctxt->sax;
11375 ctxt->sax = sax;
11376 if (user_data != NULL)
11377 ctxt->userData = user_data;
11378 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011379 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011380 newDoc = xmlNewDoc(BAD_CAST "1.0");
11381 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011382 ctxt->node_seq.maximum = 0;
11383 ctxt->node_seq.length = 0;
11384 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011385 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011386 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011387 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011388 newDoc->intSubset = doc->intSubset;
11389 newDoc->extSubset = doc->extSubset;
11390 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011391 xmlDictReference(newDoc->dict);
11392
Owen Taylor3473f882001-02-23 17:55:21 +000011393 if (doc->URL != NULL) {
11394 newDoc->URL = xmlStrdup(doc->URL);
11395 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011396 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11397 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011398 if (sax != NULL)
11399 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011400 ctxt->node_seq.maximum = 0;
11401 ctxt->node_seq.length = 0;
11402 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011403 xmlFreeParserCtxt(ctxt);
11404 newDoc->intSubset = NULL;
11405 newDoc->extSubset = NULL;
11406 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011407 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011408 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011409 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011410 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011411 ctxt->myDoc = doc;
11412 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011413
Daniel Veillard87a764e2001-06-20 17:41:10 +000011414 /*
11415 * Get the 4 first bytes and decode the charset
11416 * if enc != XML_CHAR_ENCODING_NONE
11417 * plug some encoding conversion routines.
11418 */
11419 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011420 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11421 start[0] = RAW;
11422 start[1] = NXT(1);
11423 start[2] = NXT(2);
11424 start[3] = NXT(3);
11425 enc = xmlDetectCharEncoding(start, 4);
11426 if (enc != XML_CHAR_ENCODING_NONE) {
11427 xmlSwitchEncoding(ctxt, enc);
11428 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011429 }
11430
Owen Taylor3473f882001-02-23 17:55:21 +000011431 /*
11432 * Parse a possible text declaration first
11433 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011434 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011435 xmlParseTextDecl(ctxt);
11436 }
11437
Owen Taylor3473f882001-02-23 17:55:21 +000011438 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011439 ctxt->depth = depth;
11440
11441 xmlParseContent(ctxt);
11442
Daniel Veillard561b7f82002-03-20 21:55:57 +000011443 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011444 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011445 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011446 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011447 }
11448 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011449 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011450 }
11451
11452 if (!ctxt->wellFormed) {
11453 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011454 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011455 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011456 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011457 } else {
11458 if (list != NULL) {
11459 xmlNodePtr cur;
11460
11461 /*
11462 * Return the newly created nodeset after unlinking it from
11463 * they pseudo parent.
11464 */
11465 cur = newDoc->children->children;
11466 *list = cur;
11467 while (cur != NULL) {
11468 cur->parent = NULL;
11469 cur = cur->next;
11470 }
11471 newDoc->children->children = NULL;
11472 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011473 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011474 }
11475 if (sax != NULL)
11476 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011477 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11478 oldctxt->node_seq.length = ctxt->node_seq.length;
11479 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011480 ctxt->node_seq.maximum = 0;
11481 ctxt->node_seq.length = 0;
11482 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011483 xmlFreeParserCtxt(ctxt);
11484 newDoc->intSubset = NULL;
11485 newDoc->extSubset = NULL;
11486 xmlFreeDoc(newDoc);
11487
11488 return(ret);
11489}
11490
Daniel Veillard81273902003-09-30 00:43:48 +000011491#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011492/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011493 * xmlParseExternalEntity:
11494 * @doc: the document the chunk pertains to
11495 * @sax: the SAX handler bloc (possibly NULL)
11496 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11497 * @depth: Used for loop detection, use 0
11498 * @URL: the URL for the entity to load
11499 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011500 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011501 *
11502 * Parse an external general entity
11503 * An external general parsed entity is well-formed if it matches the
11504 * production labeled extParsedEnt.
11505 *
11506 * [78] extParsedEnt ::= TextDecl? content
11507 *
11508 * Returns 0 if the entity is well formed, -1 in case of args problem and
11509 * the parser error code otherwise
11510 */
11511
11512int
11513xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011514 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011515 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011516 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011517}
11518
11519/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011520 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011521 * @doc: the document the chunk pertains to
11522 * @sax: the SAX handler bloc (possibly NULL)
11523 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11524 * @depth: Used for loop detection, use 0
11525 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011526 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011527 *
11528 * Parse a well-balanced chunk of an XML document
11529 * called by the parser
11530 * The allowed sequence for the Well Balanced Chunk is the one defined by
11531 * the content production in the XML grammar:
11532 *
11533 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11534 *
11535 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11536 * the parser error code otherwise
11537 */
11538
11539int
11540xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011541 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011542 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11543 depth, string, lst, 0 );
11544}
Daniel Veillard81273902003-09-30 00:43:48 +000011545#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011546
11547/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011548 * xmlParseBalancedChunkMemoryInternal:
11549 * @oldctxt: the existing parsing context
11550 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11551 * @user_data: the user data field for the parser context
11552 * @lst: the return value for the set of parsed nodes
11553 *
11554 *
11555 * Parse a well-balanced chunk of an XML document
11556 * called by the parser
11557 * The allowed sequence for the Well Balanced Chunk is the one defined by
11558 * the content production in the XML grammar:
11559 *
11560 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11561 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011562 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11563 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011564 *
11565 * In case recover is set to 1, the nodelist will not be empty even if
11566 * the parsed chunk is not well balanced.
11567 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011568static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011569xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11570 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11571 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011572 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011573 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011574 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011575 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011576 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011577 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011578 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011579
11580 if (oldctxt->depth > 40) {
11581 return(XML_ERR_ENTITY_LOOP);
11582 }
11583
11584
11585 if (lst != NULL)
11586 *lst = NULL;
11587 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011588 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011589
11590 size = xmlStrlen(string);
11591
11592 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011593 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011594 if (user_data != NULL)
11595 ctxt->userData = user_data;
11596 else
11597 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011598 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11599 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011600 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11601 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11602 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011603
11604 oldsax = ctxt->sax;
11605 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011606 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011607 ctxt->replaceEntities = oldctxt->replaceEntities;
11608 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011609
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011610 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011611 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011612 newDoc = xmlNewDoc(BAD_CAST "1.0");
11613 if (newDoc == NULL) {
11614 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011615 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011616 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011617 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011618 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011619 newDoc->dict = ctxt->dict;
11620 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011621 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011622 } else {
11623 ctxt->myDoc = oldctxt->myDoc;
11624 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011625 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011626 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011627 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11628 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011629 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011630 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011631 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011632 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011633 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011634 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011635 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011636 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011637 ctxt->myDoc->children = NULL;
11638 ctxt->myDoc->last = NULL;
11639 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011640 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011641 ctxt->instate = XML_PARSER_CONTENT;
11642 ctxt->depth = oldctxt->depth + 1;
11643
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 ctxt->validate = 0;
11645 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011646 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11647 /*
11648 * ID/IDREF registration will be done in xmlValidateElement below
11649 */
11650 ctxt->loadsubset |= XML_SKIP_IDS;
11651 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011652 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011653 ctxt->attsDefault = oldctxt->attsDefault;
11654 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011655
Daniel Veillard68e9e742002-11-16 15:35:11 +000011656 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011657 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011658 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011659 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011660 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011661 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011662 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011663 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011664 }
11665
11666 if (!ctxt->wellFormed) {
11667 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011668 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011669 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011670 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011671 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011672 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011673 }
11674
William M. Brack7b9154b2003-09-27 19:23:50 +000011675 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011676 xmlNodePtr cur;
11677
11678 /*
11679 * Return the newly created nodeset after unlinking it from
11680 * they pseudo parent.
11681 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011682 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011683 *lst = cur;
11684 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011685#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011686 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11687 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11688 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011689 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11690 oldctxt->myDoc, cur);
11691 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011692#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011693 cur->parent = NULL;
11694 cur = cur->next;
11695 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011696 ctxt->myDoc->children->children = NULL;
11697 }
11698 if (ctxt->myDoc != NULL) {
11699 xmlFreeNode(ctxt->myDoc->children);
11700 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011701 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011702 }
11703
11704 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011705 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011706 ctxt->attsDefault = NULL;
11707 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011708 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011709 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011710 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011711 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011712
11713 return(ret);
11714}
11715
Daniel Veillard29b17482004-08-16 00:39:03 +000011716/**
11717 * xmlParseInNodeContext:
11718 * @node: the context node
11719 * @data: the input string
11720 * @datalen: the input string length in bytes
11721 * @options: a combination of xmlParserOption
11722 * @lst: the return value for the set of parsed nodes
11723 *
11724 * Parse a well-balanced chunk of an XML document
11725 * within the context (DTD, namespaces, etc ...) of the given node.
11726 *
11727 * The allowed sequence for the data is a Well Balanced Chunk defined by
11728 * the content production in the XML grammar:
11729 *
11730 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11731 *
11732 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11733 * error code otherwise
11734 */
11735xmlParserErrors
11736xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11737 int options, xmlNodePtr *lst) {
11738#ifdef SAX2
11739 xmlParserCtxtPtr ctxt;
11740 xmlDocPtr doc = NULL;
11741 xmlNodePtr fake, cur;
11742 int nsnr = 0;
11743
11744 xmlParserErrors ret = XML_ERR_OK;
11745
11746 /*
11747 * check all input parameters, grab the document
11748 */
11749 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11750 return(XML_ERR_INTERNAL_ERROR);
11751 switch (node->type) {
11752 case XML_ELEMENT_NODE:
11753 case XML_ATTRIBUTE_NODE:
11754 case XML_TEXT_NODE:
11755 case XML_CDATA_SECTION_NODE:
11756 case XML_ENTITY_REF_NODE:
11757 case XML_PI_NODE:
11758 case XML_COMMENT_NODE:
11759 case XML_DOCUMENT_NODE:
11760 case XML_HTML_DOCUMENT_NODE:
11761 break;
11762 default:
11763 return(XML_ERR_INTERNAL_ERROR);
11764
11765 }
11766 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11767 (node->type != XML_DOCUMENT_NODE) &&
11768 (node->type != XML_HTML_DOCUMENT_NODE))
11769 node = node->parent;
11770 if (node == NULL)
11771 return(XML_ERR_INTERNAL_ERROR);
11772 if (node->type == XML_ELEMENT_NODE)
11773 doc = node->doc;
11774 else
11775 doc = (xmlDocPtr) node;
11776 if (doc == NULL)
11777 return(XML_ERR_INTERNAL_ERROR);
11778
11779 /*
11780 * allocate a context and set-up everything not related to the
11781 * node position in the tree
11782 */
11783 if (doc->type == XML_DOCUMENT_NODE)
11784 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11785#ifdef LIBXML_HTML_ENABLED
11786 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11787 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11788#endif
11789 else
11790 return(XML_ERR_INTERNAL_ERROR);
11791
11792 if (ctxt == NULL)
11793 return(XML_ERR_NO_MEMORY);
11794 fake = xmlNewComment(NULL);
11795 if (fake == NULL) {
11796 xmlFreeParserCtxt(ctxt);
11797 return(XML_ERR_NO_MEMORY);
11798 }
11799 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011800
11801 /*
11802 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11803 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11804 * we must wait until the last moment to free the original one.
11805 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011806 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011807 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011808 xmlDictFree(ctxt->dict);
11809 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011810 } else
11811 options |= XML_PARSE_NODICT;
11812
11813 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011814 xmlDetectSAX2(ctxt);
11815 ctxt->myDoc = doc;
11816
11817 if (node->type == XML_ELEMENT_NODE) {
11818 nodePush(ctxt, node);
11819 /*
11820 * initialize the SAX2 namespaces stack
11821 */
11822 cur = node;
11823 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11824 xmlNsPtr ns = cur->nsDef;
11825 const xmlChar *iprefix, *ihref;
11826
11827 while (ns != NULL) {
11828 if (ctxt->dict) {
11829 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11830 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11831 } else {
11832 iprefix = ns->prefix;
11833 ihref = ns->href;
11834 }
11835
11836 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11837 nsPush(ctxt, iprefix, ihref);
11838 nsnr++;
11839 }
11840 ns = ns->next;
11841 }
11842 cur = cur->parent;
11843 }
11844 ctxt->instate = XML_PARSER_CONTENT;
11845 }
11846
11847 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11848 /*
11849 * ID/IDREF registration will be done in xmlValidateElement below
11850 */
11851 ctxt->loadsubset |= XML_SKIP_IDS;
11852 }
11853
Daniel Veillard499cc922006-01-18 17:22:35 +000011854#ifdef LIBXML_HTML_ENABLED
11855 if (doc->type == XML_HTML_DOCUMENT_NODE)
11856 __htmlParseContent(ctxt);
11857 else
11858#endif
11859 xmlParseContent(ctxt);
11860
Daniel Veillard29b17482004-08-16 00:39:03 +000011861 nsPop(ctxt, nsnr);
11862 if ((RAW == '<') && (NXT(1) == '/')) {
11863 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11864 } else if (RAW != 0) {
11865 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11866 }
11867 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11868 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11869 ctxt->wellFormed = 0;
11870 }
11871
11872 if (!ctxt->wellFormed) {
11873 if (ctxt->errNo == 0)
11874 ret = XML_ERR_INTERNAL_ERROR;
11875 else
11876 ret = (xmlParserErrors)ctxt->errNo;
11877 } else {
11878 ret = XML_ERR_OK;
11879 }
11880
11881 /*
11882 * Return the newly created nodeset after unlinking it from
11883 * the pseudo sibling.
11884 */
11885
11886 cur = fake->next;
11887 fake->next = NULL;
11888 node->last = fake;
11889
11890 if (cur != NULL) {
11891 cur->prev = NULL;
11892 }
11893
11894 *lst = cur;
11895
11896 while (cur != NULL) {
11897 cur->parent = NULL;
11898 cur = cur->next;
11899 }
11900
11901 xmlUnlinkNode(fake);
11902 xmlFreeNode(fake);
11903
11904
11905 if (ret != XML_ERR_OK) {
11906 xmlFreeNodeList(*lst);
11907 *lst = NULL;
11908 }
William M. Brackc3f81342004-10-03 01:22:44 +000011909
William M. Brackb7b54de2004-10-06 16:38:01 +000011910 if (doc->dict != NULL)
11911 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011912 xmlFreeParserCtxt(ctxt);
11913
11914 return(ret);
11915#else /* !SAX2 */
11916 return(XML_ERR_INTERNAL_ERROR);
11917#endif
11918}
11919
Daniel Veillard81273902003-09-30 00:43:48 +000011920#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011921/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011922 * xmlParseBalancedChunkMemoryRecover:
11923 * @doc: the document the chunk pertains to
11924 * @sax: the SAX handler bloc (possibly NULL)
11925 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11926 * @depth: Used for loop detection, use 0
11927 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11928 * @lst: the return value for the set of parsed nodes
11929 * @recover: return nodes even if the data is broken (use 0)
11930 *
11931 *
11932 * Parse a well-balanced chunk of an XML document
11933 * called by the parser
11934 * The allowed sequence for the Well Balanced Chunk is the one defined by
11935 * the content production in the XML grammar:
11936 *
11937 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11938 *
11939 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11940 * the parser error code otherwise
11941 *
11942 * In case recover is set to 1, the nodelist will not be empty even if
11943 * the parsed chunk is not well balanced.
11944 */
11945int
11946xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11947 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11948 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011949 xmlParserCtxtPtr ctxt;
11950 xmlDocPtr newDoc;
11951 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011952 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011953 int size;
11954 int ret = 0;
11955
11956 if (depth > 40) {
11957 return(XML_ERR_ENTITY_LOOP);
11958 }
11959
11960
Daniel Veillardcda96922001-08-21 10:56:31 +000011961 if (lst != NULL)
11962 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011963 if (string == NULL)
11964 return(-1);
11965
11966 size = xmlStrlen(string);
11967
11968 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11969 if (ctxt == NULL) return(-1);
11970 ctxt->userData = ctxt;
11971 if (sax != NULL) {
11972 oldsax = ctxt->sax;
11973 ctxt->sax = sax;
11974 if (user_data != NULL)
11975 ctxt->userData = user_data;
11976 }
11977 newDoc = xmlNewDoc(BAD_CAST "1.0");
11978 if (newDoc == NULL) {
11979 xmlFreeParserCtxt(ctxt);
11980 return(-1);
11981 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011982 if ((doc != NULL) && (doc->dict != NULL)) {
11983 xmlDictFree(ctxt->dict);
11984 ctxt->dict = doc->dict;
11985 xmlDictReference(ctxt->dict);
11986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11989 ctxt->dictNames = 1;
11990 } else {
11991 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11992 }
Owen Taylor3473f882001-02-23 17:55:21 +000011993 if (doc != NULL) {
11994 newDoc->intSubset = doc->intSubset;
11995 newDoc->extSubset = doc->extSubset;
11996 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011997 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11998 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011999 if (sax != NULL)
12000 ctxt->sax = oldsax;
12001 xmlFreeParserCtxt(ctxt);
12002 newDoc->intSubset = NULL;
12003 newDoc->extSubset = NULL;
12004 xmlFreeDoc(newDoc);
12005 return(-1);
12006 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012007 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12008 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012009 if (doc == NULL) {
12010 ctxt->myDoc = newDoc;
12011 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012012 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012013 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012014 /* Ensure that doc has XML spec namespace */
12015 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12016 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012017 }
12018 ctxt->instate = XML_PARSER_CONTENT;
12019 ctxt->depth = depth;
12020
12021 /*
12022 * Doing validity checking on chunk doesn't make sense
12023 */
12024 ctxt->validate = 0;
12025 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012026 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012027
Daniel Veillardb39bc392002-10-26 19:29:51 +000012028 if ( doc != NULL ){
12029 content = doc->children;
12030 doc->children = NULL;
12031 xmlParseContent(ctxt);
12032 doc->children = content;
12033 }
12034 else {
12035 xmlParseContent(ctxt);
12036 }
Owen Taylor3473f882001-02-23 17:55:21 +000012037 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012038 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012039 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012040 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012041 }
12042 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012043 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012044 }
12045
12046 if (!ctxt->wellFormed) {
12047 if (ctxt->errNo == 0)
12048 ret = 1;
12049 else
12050 ret = ctxt->errNo;
12051 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012052 ret = 0;
12053 }
12054
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012055 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12056 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012057
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012058 /*
12059 * Return the newly created nodeset after unlinking it from
12060 * they pseudo parent.
12061 */
12062 cur = newDoc->children->children;
12063 *lst = cur;
12064 while (cur != NULL) {
12065 xmlSetTreeDoc(cur, doc);
12066 cur->parent = NULL;
12067 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012068 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012069 newDoc->children->children = NULL;
12070 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012071
Owen Taylor3473f882001-02-23 17:55:21 +000012072 if (sax != NULL)
12073 ctxt->sax = oldsax;
12074 xmlFreeParserCtxt(ctxt);
12075 newDoc->intSubset = NULL;
12076 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012077 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012078 xmlFreeDoc(newDoc);
12079
12080 return(ret);
12081}
12082
12083/**
12084 * xmlSAXParseEntity:
12085 * @sax: the SAX handler block
12086 * @filename: the filename
12087 *
12088 * parse an XML external entity out of context and build a tree.
12089 * It use the given SAX function block to handle the parsing callback.
12090 * If sax is NULL, fallback to the default DOM tree building routines.
12091 *
12092 * [78] extParsedEnt ::= TextDecl? content
12093 *
12094 * This correspond to a "Well Balanced" chunk
12095 *
12096 * Returns the resulting document tree
12097 */
12098
12099xmlDocPtr
12100xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12101 xmlDocPtr ret;
12102 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012103
12104 ctxt = xmlCreateFileParserCtxt(filename);
12105 if (ctxt == NULL) {
12106 return(NULL);
12107 }
12108 if (sax != NULL) {
12109 if (ctxt->sax != NULL)
12110 xmlFree(ctxt->sax);
12111 ctxt->sax = sax;
12112 ctxt->userData = NULL;
12113 }
12114
Owen Taylor3473f882001-02-23 17:55:21 +000012115 xmlParseExtParsedEnt(ctxt);
12116
12117 if (ctxt->wellFormed)
12118 ret = ctxt->myDoc;
12119 else {
12120 ret = NULL;
12121 xmlFreeDoc(ctxt->myDoc);
12122 ctxt->myDoc = NULL;
12123 }
12124 if (sax != NULL)
12125 ctxt->sax = NULL;
12126 xmlFreeParserCtxt(ctxt);
12127
12128 return(ret);
12129}
12130
12131/**
12132 * xmlParseEntity:
12133 * @filename: the filename
12134 *
12135 * parse an XML external entity out of context and build a tree.
12136 *
12137 * [78] extParsedEnt ::= TextDecl? content
12138 *
12139 * This correspond to a "Well Balanced" chunk
12140 *
12141 * Returns the resulting document tree
12142 */
12143
12144xmlDocPtr
12145xmlParseEntity(const char *filename) {
12146 return(xmlSAXParseEntity(NULL, filename));
12147}
Daniel Veillard81273902003-09-30 00:43:48 +000012148#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012149
12150/**
12151 * xmlCreateEntityParserCtxt:
12152 * @URL: the entity URL
12153 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012154 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012155 *
12156 * Create a parser context for an external entity
12157 * Automatic support for ZLIB/Compress compressed document is provided
12158 * by default if found at compile-time.
12159 *
12160 * Returns the new parser context or NULL
12161 */
12162xmlParserCtxtPtr
12163xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12164 const xmlChar *base) {
12165 xmlParserCtxtPtr ctxt;
12166 xmlParserInputPtr inputStream;
12167 char *directory = NULL;
12168 xmlChar *uri;
12169
12170 ctxt = xmlNewParserCtxt();
12171 if (ctxt == NULL) {
12172 return(NULL);
12173 }
12174
12175 uri = xmlBuildURI(URL, base);
12176
12177 if (uri == NULL) {
12178 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12179 if (inputStream == NULL) {
12180 xmlFreeParserCtxt(ctxt);
12181 return(NULL);
12182 }
12183
12184 inputPush(ctxt, inputStream);
12185
12186 if ((ctxt->directory == NULL) && (directory == NULL))
12187 directory = xmlParserGetDirectory((char *)URL);
12188 if ((ctxt->directory == NULL) && (directory != NULL))
12189 ctxt->directory = directory;
12190 } else {
12191 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12192 if (inputStream == NULL) {
12193 xmlFree(uri);
12194 xmlFreeParserCtxt(ctxt);
12195 return(NULL);
12196 }
12197
12198 inputPush(ctxt, inputStream);
12199
12200 if ((ctxt->directory == NULL) && (directory == NULL))
12201 directory = xmlParserGetDirectory((char *)uri);
12202 if ((ctxt->directory == NULL) && (directory != NULL))
12203 ctxt->directory = directory;
12204 xmlFree(uri);
12205 }
Owen Taylor3473f882001-02-23 17:55:21 +000012206 return(ctxt);
12207}
12208
12209/************************************************************************
12210 * *
12211 * Front ends when parsing from a file *
12212 * *
12213 ************************************************************************/
12214
12215/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012216 * xmlCreateURLParserCtxt:
12217 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012218 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012219 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012220 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012221 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012222 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012223 *
12224 * Returns the new parser context or NULL
12225 */
12226xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012227xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012228{
12229 xmlParserCtxtPtr ctxt;
12230 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012231 char *directory = NULL;
12232
Owen Taylor3473f882001-02-23 17:55:21 +000012233 ctxt = xmlNewParserCtxt();
12234 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012235 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012236 return(NULL);
12237 }
12238
Daniel Veillarddf292f72005-01-16 19:00:15 +000012239 if (options)
12240 xmlCtxtUseOptions(ctxt, options);
12241 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012242
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012243 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012244 if (inputStream == NULL) {
12245 xmlFreeParserCtxt(ctxt);
12246 return(NULL);
12247 }
12248
Owen Taylor3473f882001-02-23 17:55:21 +000012249 inputPush(ctxt, inputStream);
12250 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012251 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012252 if ((ctxt->directory == NULL) && (directory != NULL))
12253 ctxt->directory = directory;
12254
12255 return(ctxt);
12256}
12257
Daniel Veillard61b93382003-11-03 14:28:31 +000012258/**
12259 * xmlCreateFileParserCtxt:
12260 * @filename: the filename
12261 *
12262 * Create a parser context for a file content.
12263 * Automatic support for ZLIB/Compress compressed document is provided
12264 * by default if found at compile-time.
12265 *
12266 * Returns the new parser context or NULL
12267 */
12268xmlParserCtxtPtr
12269xmlCreateFileParserCtxt(const char *filename)
12270{
12271 return(xmlCreateURLParserCtxt(filename, 0));
12272}
12273
Daniel Veillard81273902003-09-30 00:43:48 +000012274#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012275/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012276 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012277 * @sax: the SAX handler block
12278 * @filename: the filename
12279 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12280 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012281 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012282 *
12283 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12284 * compressed document is provided by default if found at compile-time.
12285 * It use the given SAX function block to handle the parsing callback.
12286 * If sax is NULL, fallback to the default DOM tree building routines.
12287 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012288 * User data (void *) is stored within the parser context in the
12289 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012290 *
Owen Taylor3473f882001-02-23 17:55:21 +000012291 * Returns the resulting document tree
12292 */
12293
12294xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012295xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12296 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012297 xmlDocPtr ret;
12298 xmlParserCtxtPtr ctxt;
12299 char *directory = NULL;
12300
Daniel Veillard635ef722001-10-29 11:48:19 +000012301 xmlInitParser();
12302
Owen Taylor3473f882001-02-23 17:55:21 +000012303 ctxt = xmlCreateFileParserCtxt(filename);
12304 if (ctxt == NULL) {
12305 return(NULL);
12306 }
12307 if (sax != NULL) {
12308 if (ctxt->sax != NULL)
12309 xmlFree(ctxt->sax);
12310 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012311 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012312 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012313 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012314 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012315 }
Owen Taylor3473f882001-02-23 17:55:21 +000012316
12317 if ((ctxt->directory == NULL) && (directory == NULL))
12318 directory = xmlParserGetDirectory(filename);
12319 if ((ctxt->directory == NULL) && (directory != NULL))
12320 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12321
Daniel Veillarddad3f682002-11-17 16:47:27 +000012322 ctxt->recovery = recovery;
12323
Owen Taylor3473f882001-02-23 17:55:21 +000012324 xmlParseDocument(ctxt);
12325
William M. Brackc07329e2003-09-08 01:57:30 +000012326 if ((ctxt->wellFormed) || recovery) {
12327 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012328 if (ret != NULL) {
12329 if (ctxt->input->buf->compressed > 0)
12330 ret->compression = 9;
12331 else
12332 ret->compression = ctxt->input->buf->compressed;
12333 }
William M. Brackc07329e2003-09-08 01:57:30 +000012334 }
Owen Taylor3473f882001-02-23 17:55:21 +000012335 else {
12336 ret = NULL;
12337 xmlFreeDoc(ctxt->myDoc);
12338 ctxt->myDoc = NULL;
12339 }
12340 if (sax != NULL)
12341 ctxt->sax = NULL;
12342 xmlFreeParserCtxt(ctxt);
12343
12344 return(ret);
12345}
12346
12347/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012348 * xmlSAXParseFile:
12349 * @sax: the SAX handler block
12350 * @filename: the filename
12351 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12352 * documents
12353 *
12354 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12355 * compressed document is provided by default if found at compile-time.
12356 * It use the given SAX function block to handle the parsing callback.
12357 * If sax is NULL, fallback to the default DOM tree building routines.
12358 *
12359 * Returns the resulting document tree
12360 */
12361
12362xmlDocPtr
12363xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12364 int recovery) {
12365 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12366}
12367
12368/**
Owen Taylor3473f882001-02-23 17:55:21 +000012369 * xmlRecoverDoc:
12370 * @cur: a pointer to an array of xmlChar
12371 *
12372 * parse an XML in-memory document and build a tree.
12373 * In the case the document is not Well Formed, a tree is built anyway
12374 *
12375 * Returns the resulting document tree
12376 */
12377
12378xmlDocPtr
12379xmlRecoverDoc(xmlChar *cur) {
12380 return(xmlSAXParseDoc(NULL, cur, 1));
12381}
12382
12383/**
12384 * xmlParseFile:
12385 * @filename: the filename
12386 *
12387 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12388 * compressed document is provided by default if found at compile-time.
12389 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012390 * Returns the resulting document tree if the file was wellformed,
12391 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012392 */
12393
12394xmlDocPtr
12395xmlParseFile(const char *filename) {
12396 return(xmlSAXParseFile(NULL, filename, 0));
12397}
12398
12399/**
12400 * xmlRecoverFile:
12401 * @filename: the filename
12402 *
12403 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12404 * compressed document is provided by default if found at compile-time.
12405 * In the case the document is not Well Formed, a tree is built anyway
12406 *
12407 * Returns the resulting document tree
12408 */
12409
12410xmlDocPtr
12411xmlRecoverFile(const char *filename) {
12412 return(xmlSAXParseFile(NULL, filename, 1));
12413}
12414
12415
12416/**
12417 * xmlSetupParserForBuffer:
12418 * @ctxt: an XML parser context
12419 * @buffer: a xmlChar * buffer
12420 * @filename: a file name
12421 *
12422 * Setup the parser context to parse a new buffer; Clears any prior
12423 * contents from the parser context. The buffer parameter must not be
12424 * NULL, but the filename parameter can be
12425 */
12426void
12427xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12428 const char* filename)
12429{
12430 xmlParserInputPtr input;
12431
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012432 if ((ctxt == NULL) || (buffer == NULL))
12433 return;
12434
Owen Taylor3473f882001-02-23 17:55:21 +000012435 input = xmlNewInputStream(ctxt);
12436 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012437 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012438 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012439 return;
12440 }
12441
12442 xmlClearParserCtxt(ctxt);
12443 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012444 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012445 input->base = buffer;
12446 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012447 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012448 inputPush(ctxt, input);
12449}
12450
12451/**
12452 * xmlSAXUserParseFile:
12453 * @sax: a SAX handler
12454 * @user_data: The user data returned on SAX callbacks
12455 * @filename: a file name
12456 *
12457 * parse an XML file and call the given SAX handler routines.
12458 * Automatic support for ZLIB/Compress compressed document is provided
12459 *
12460 * Returns 0 in case of success or a error number otherwise
12461 */
12462int
12463xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12464 const char *filename) {
12465 int ret = 0;
12466 xmlParserCtxtPtr ctxt;
12467
12468 ctxt = xmlCreateFileParserCtxt(filename);
12469 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012470#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012471 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012472#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012473 xmlFree(ctxt->sax);
12474 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012475 xmlDetectSAX2(ctxt);
12476
Owen Taylor3473f882001-02-23 17:55:21 +000012477 if (user_data != NULL)
12478 ctxt->userData = user_data;
12479
12480 xmlParseDocument(ctxt);
12481
12482 if (ctxt->wellFormed)
12483 ret = 0;
12484 else {
12485 if (ctxt->errNo != 0)
12486 ret = ctxt->errNo;
12487 else
12488 ret = -1;
12489 }
12490 if (sax != NULL)
12491 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012492 if (ctxt->myDoc != NULL) {
12493 xmlFreeDoc(ctxt->myDoc);
12494 ctxt->myDoc = NULL;
12495 }
Owen Taylor3473f882001-02-23 17:55:21 +000012496 xmlFreeParserCtxt(ctxt);
12497
12498 return ret;
12499}
Daniel Veillard81273902003-09-30 00:43:48 +000012500#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012501
12502/************************************************************************
12503 * *
12504 * Front ends when parsing from memory *
12505 * *
12506 ************************************************************************/
12507
12508/**
12509 * xmlCreateMemoryParserCtxt:
12510 * @buffer: a pointer to a char array
12511 * @size: the size of the array
12512 *
12513 * Create a parser context for an XML in-memory document.
12514 *
12515 * Returns the new parser context or NULL
12516 */
12517xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012518xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012519 xmlParserCtxtPtr ctxt;
12520 xmlParserInputPtr input;
12521 xmlParserInputBufferPtr buf;
12522
12523 if (buffer == NULL)
12524 return(NULL);
12525 if (size <= 0)
12526 return(NULL);
12527
12528 ctxt = xmlNewParserCtxt();
12529 if (ctxt == NULL)
12530 return(NULL);
12531
Daniel Veillard53350552003-09-18 13:35:51 +000012532 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012533 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012534 if (buf == NULL) {
12535 xmlFreeParserCtxt(ctxt);
12536 return(NULL);
12537 }
Owen Taylor3473f882001-02-23 17:55:21 +000012538
12539 input = xmlNewInputStream(ctxt);
12540 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012541 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012542 xmlFreeParserCtxt(ctxt);
12543 return(NULL);
12544 }
12545
12546 input->filename = NULL;
12547 input->buf = buf;
12548 input->base = input->buf->buffer->content;
12549 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012550 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012551
12552 inputPush(ctxt, input);
12553 return(ctxt);
12554}
12555
Daniel Veillard81273902003-09-30 00:43:48 +000012556#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012557/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012558 * xmlSAXParseMemoryWithData:
12559 * @sax: the SAX handler block
12560 * @buffer: an pointer to a char array
12561 * @size: the size of the array
12562 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12563 * documents
12564 * @data: the userdata
12565 *
12566 * parse an XML in-memory block and use the given SAX function block
12567 * to handle the parsing callback. If sax is NULL, fallback to the default
12568 * DOM tree building routines.
12569 *
12570 * User data (void *) is stored within the parser context in the
12571 * context's _private member, so it is available nearly everywhere in libxml
12572 *
12573 * Returns the resulting document tree
12574 */
12575
12576xmlDocPtr
12577xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12578 int size, int recovery, void *data) {
12579 xmlDocPtr ret;
12580 xmlParserCtxtPtr ctxt;
12581
12582 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12583 if (ctxt == NULL) return(NULL);
12584 if (sax != NULL) {
12585 if (ctxt->sax != NULL)
12586 xmlFree(ctxt->sax);
12587 ctxt->sax = sax;
12588 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012589 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012590 if (data!=NULL) {
12591 ctxt->_private=data;
12592 }
12593
Daniel Veillardadba5f12003-04-04 16:09:01 +000012594 ctxt->recovery = recovery;
12595
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012596 xmlParseDocument(ctxt);
12597
12598 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12599 else {
12600 ret = NULL;
12601 xmlFreeDoc(ctxt->myDoc);
12602 ctxt->myDoc = NULL;
12603 }
12604 if (sax != NULL)
12605 ctxt->sax = NULL;
12606 xmlFreeParserCtxt(ctxt);
12607
12608 return(ret);
12609}
12610
12611/**
Owen Taylor3473f882001-02-23 17:55:21 +000012612 * xmlSAXParseMemory:
12613 * @sax: the SAX handler block
12614 * @buffer: an pointer to a char array
12615 * @size: the size of the array
12616 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12617 * documents
12618 *
12619 * parse an XML in-memory block and use the given SAX function block
12620 * to handle the parsing callback. If sax is NULL, fallback to the default
12621 * DOM tree building routines.
12622 *
12623 * Returns the resulting document tree
12624 */
12625xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012626xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12627 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012628 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012629}
12630
12631/**
12632 * xmlParseMemory:
12633 * @buffer: an pointer to a char array
12634 * @size: the size of the array
12635 *
12636 * parse an XML in-memory block and build a tree.
12637 *
12638 * Returns the resulting document tree
12639 */
12640
Daniel Veillard50822cb2001-07-26 20:05:51 +000012641xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012642 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12643}
12644
12645/**
12646 * xmlRecoverMemory:
12647 * @buffer: an pointer to a char array
12648 * @size: the size of the array
12649 *
12650 * parse an XML in-memory block and build a tree.
12651 * In the case the document is not Well Formed, a tree is built anyway
12652 *
12653 * Returns the resulting document tree
12654 */
12655
Daniel Veillard50822cb2001-07-26 20:05:51 +000012656xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012657 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12658}
12659
12660/**
12661 * xmlSAXUserParseMemory:
12662 * @sax: a SAX handler
12663 * @user_data: The user data returned on SAX callbacks
12664 * @buffer: an in-memory XML document input
12665 * @size: the length of the XML document in bytes
12666 *
12667 * A better SAX parsing routine.
12668 * parse an XML in-memory buffer and call the given SAX handler routines.
12669 *
12670 * Returns 0 in case of success or a error number otherwise
12671 */
12672int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012673 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012674 int ret = 0;
12675 xmlParserCtxtPtr ctxt;
12676 xmlSAXHandlerPtr oldsax = NULL;
12677
Daniel Veillard9e923512002-08-14 08:48:52 +000012678 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012679 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12680 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012681 oldsax = ctxt->sax;
12682 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012683 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012684 if (user_data != NULL)
12685 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012686
12687 xmlParseDocument(ctxt);
12688
12689 if (ctxt->wellFormed)
12690 ret = 0;
12691 else {
12692 if (ctxt->errNo != 0)
12693 ret = ctxt->errNo;
12694 else
12695 ret = -1;
12696 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012697 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012698 if (ctxt->myDoc != NULL) {
12699 xmlFreeDoc(ctxt->myDoc);
12700 ctxt->myDoc = NULL;
12701 }
Owen Taylor3473f882001-02-23 17:55:21 +000012702 xmlFreeParserCtxt(ctxt);
12703
12704 return ret;
12705}
Daniel Veillard81273902003-09-30 00:43:48 +000012706#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012707
12708/**
12709 * xmlCreateDocParserCtxt:
12710 * @cur: a pointer to an array of xmlChar
12711 *
12712 * Creates a parser context for an XML in-memory document.
12713 *
12714 * Returns the new parser context or NULL
12715 */
12716xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012717xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012718 int len;
12719
12720 if (cur == NULL)
12721 return(NULL);
12722 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012723 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012724}
12725
Daniel Veillard81273902003-09-30 00:43:48 +000012726#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012727/**
12728 * xmlSAXParseDoc:
12729 * @sax: the SAX handler block
12730 * @cur: a pointer to an array of xmlChar
12731 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12732 * documents
12733 *
12734 * parse an XML in-memory document and build a tree.
12735 * It use the given SAX function block to handle the parsing callback.
12736 * If sax is NULL, fallback to the default DOM tree building routines.
12737 *
12738 * Returns the resulting document tree
12739 */
12740
12741xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012742xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012743 xmlDocPtr ret;
12744 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012745 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012746
Daniel Veillard38936062004-11-04 17:45:11 +000012747 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012748
12749
12750 ctxt = xmlCreateDocParserCtxt(cur);
12751 if (ctxt == NULL) return(NULL);
12752 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012753 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012754 ctxt->sax = sax;
12755 ctxt->userData = NULL;
12756 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012757 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012758
12759 xmlParseDocument(ctxt);
12760 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12761 else {
12762 ret = NULL;
12763 xmlFreeDoc(ctxt->myDoc);
12764 ctxt->myDoc = NULL;
12765 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012766 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012767 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012768 xmlFreeParserCtxt(ctxt);
12769
12770 return(ret);
12771}
12772
12773/**
12774 * xmlParseDoc:
12775 * @cur: a pointer to an array of xmlChar
12776 *
12777 * parse an XML in-memory document and build a tree.
12778 *
12779 * Returns the resulting document tree
12780 */
12781
12782xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012783xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012784 return(xmlSAXParseDoc(NULL, cur, 0));
12785}
Daniel Veillard81273902003-09-30 00:43:48 +000012786#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012787
Daniel Veillard81273902003-09-30 00:43:48 +000012788#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012789/************************************************************************
12790 * *
12791 * Specific function to keep track of entities references *
12792 * and used by the XSLT debugger *
12793 * *
12794 ************************************************************************/
12795
12796static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12797
12798/**
12799 * xmlAddEntityReference:
12800 * @ent : A valid entity
12801 * @firstNode : A valid first node for children of entity
12802 * @lastNode : A valid last node of children entity
12803 *
12804 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12805 */
12806static void
12807xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12808 xmlNodePtr lastNode)
12809{
12810 if (xmlEntityRefFunc != NULL) {
12811 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12812 }
12813}
12814
12815
12816/**
12817 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012818 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012819 *
12820 * Set the function to call call back when a xml reference has been made
12821 */
12822void
12823xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12824{
12825 xmlEntityRefFunc = func;
12826}
Daniel Veillard81273902003-09-30 00:43:48 +000012827#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829/************************************************************************
12830 * *
12831 * Miscellaneous *
12832 * *
12833 ************************************************************************/
12834
12835#ifdef LIBXML_XPATH_ENABLED
12836#include <libxml/xpath.h>
12837#endif
12838
Daniel Veillardffa3c742005-07-21 13:24:09 +000012839extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012840static int xmlParserInitialized = 0;
12841
12842/**
12843 * xmlInitParser:
12844 *
12845 * Initialization function for the XML parser.
12846 * This is not reentrant. Call once before processing in case of
12847 * use in multithreaded programs.
12848 */
12849
12850void
12851xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012852 if (xmlParserInitialized != 0)
12853 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012854
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012855#ifdef LIBXML_THREAD_ENABLED
12856 __xmlGlobalInitMutexLock();
12857 if (xmlParserInitialized == 0) {
12858#endif
12859 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12860 (xmlGenericError == NULL))
12861 initGenericErrorDefaultFunc(NULL);
12862 xmlInitGlobals();
12863 xmlInitThreads();
12864 xmlInitMemory();
12865 xmlInitCharEncodingHandlers();
12866 xmlDefaultSAXHandlerInit();
12867 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012868#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012869 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012870#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012871#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012872 htmlInitAutoClose();
12873 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012874#endif
12875#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012876 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012877#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012878 xmlParserInitialized = 1;
12879#ifdef LIBXML_THREAD_ENABLED
12880 }
12881 __xmlGlobalInitMutexUnlock();
12882#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012883}
12884
12885/**
12886 * xmlCleanupParser:
12887 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012888 * Cleanup function for the XML library. It tries to reclaim all
12889 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012890 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012891 * function should not prevent reusing the library but one should
12892 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012893 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012894 */
12895
12896void
12897xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012898 if (!xmlParserInitialized)
12899 return;
12900
Owen Taylor3473f882001-02-23 17:55:21 +000012901 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012902#ifdef LIBXML_CATALOG_ENABLED
12903 xmlCatalogCleanup();
12904#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012905 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012906 xmlCleanupInputCallbacks();
12907#ifdef LIBXML_OUTPUT_ENABLED
12908 xmlCleanupOutputCallbacks();
12909#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012910#ifdef LIBXML_SCHEMAS_ENABLED
12911 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012912 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012913#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012914 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012915 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012916 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012917 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012918 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012919}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012920
12921/************************************************************************
12922 * *
12923 * New set (2.6.0) of simpler and more flexible APIs *
12924 * *
12925 ************************************************************************/
12926
12927/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012928 * DICT_FREE:
12929 * @str: a string
12930 *
12931 * Free a string if it is not owned by the "dict" dictionnary in the
12932 * current scope
12933 */
12934#define DICT_FREE(str) \
12935 if ((str) && ((!dict) || \
12936 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12937 xmlFree((char *)(str));
12938
12939/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012940 * xmlCtxtReset:
12941 * @ctxt: an XML parser context
12942 *
12943 * Reset a parser context
12944 */
12945void
12946xmlCtxtReset(xmlParserCtxtPtr ctxt)
12947{
12948 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012949 xmlDictPtr dict;
12950
12951 if (ctxt == NULL)
12952 return;
12953
12954 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012955
12956 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12957 xmlFreeInputStream(input);
12958 }
12959 ctxt->inputNr = 0;
12960 ctxt->input = NULL;
12961
12962 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000012963 if (ctxt->spaceTab != NULL) {
12964 ctxt->spaceTab[0] = -1;
12965 ctxt->space = &ctxt->spaceTab[0];
12966 } else {
12967 ctxt->space = NULL;
12968 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969
12970
12971 ctxt->nodeNr = 0;
12972 ctxt->node = NULL;
12973
12974 ctxt->nameNr = 0;
12975 ctxt->name = NULL;
12976
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012977 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012978 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012979 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012980 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012981 DICT_FREE(ctxt->directory);
12982 ctxt->directory = NULL;
12983 DICT_FREE(ctxt->extSubURI);
12984 ctxt->extSubURI = NULL;
12985 DICT_FREE(ctxt->extSubSystem);
12986 ctxt->extSubSystem = NULL;
12987 if (ctxt->myDoc != NULL)
12988 xmlFreeDoc(ctxt->myDoc);
12989 ctxt->myDoc = NULL;
12990
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012991 ctxt->standalone = -1;
12992 ctxt->hasExternalSubset = 0;
12993 ctxt->hasPErefs = 0;
12994 ctxt->html = 0;
12995 ctxt->external = 0;
12996 ctxt->instate = XML_PARSER_START;
12997 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012999 ctxt->wellFormed = 1;
13000 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013001 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013002 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013003#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013004 ctxt->vctxt.userData = ctxt;
13005 ctxt->vctxt.error = xmlParserValidityError;
13006 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013007#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 ctxt->record_info = 0;
13009 ctxt->nbChars = 0;
13010 ctxt->checkIndex = 0;
13011 ctxt->inSubset = 0;
13012 ctxt->errNo = XML_ERR_OK;
13013 ctxt->depth = 0;
13014 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13015 ctxt->catalogs = NULL;
13016 xmlInitNodeInfoSeq(&ctxt->node_seq);
13017
13018 if (ctxt->attsDefault != NULL) {
13019 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13020 ctxt->attsDefault = NULL;
13021 }
13022 if (ctxt->attsSpecial != NULL) {
13023 xmlHashFree(ctxt->attsSpecial, NULL);
13024 ctxt->attsSpecial = NULL;
13025 }
13026
Daniel Veillard4432df22003-09-28 18:58:27 +000013027#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013028 if (ctxt->catalogs != NULL)
13029 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013030#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013031 if (ctxt->lastError.code != XML_ERR_OK)
13032 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013033}
13034
13035/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013036 * xmlCtxtResetPush:
13037 * @ctxt: an XML parser context
13038 * @chunk: a pointer to an array of chars
13039 * @size: number of chars in the array
13040 * @filename: an optional file name or URI
13041 * @encoding: the document encoding, or NULL
13042 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013043 * Reset a push parser context
13044 *
13045 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013046 */
13047int
13048xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13049 int size, const char *filename, const char *encoding)
13050{
13051 xmlParserInputPtr inputStream;
13052 xmlParserInputBufferPtr buf;
13053 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13054
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013055 if (ctxt == NULL)
13056 return(1);
13057
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013058 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13059 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13060
13061 buf = xmlAllocParserInputBuffer(enc);
13062 if (buf == NULL)
13063 return(1);
13064
13065 if (ctxt == NULL) {
13066 xmlFreeParserInputBuffer(buf);
13067 return(1);
13068 }
13069
13070 xmlCtxtReset(ctxt);
13071
13072 if (ctxt->pushTab == NULL) {
13073 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13074 sizeof(xmlChar *));
13075 if (ctxt->pushTab == NULL) {
13076 xmlErrMemory(ctxt, NULL);
13077 xmlFreeParserInputBuffer(buf);
13078 return(1);
13079 }
13080 }
13081
13082 if (filename == NULL) {
13083 ctxt->directory = NULL;
13084 } else {
13085 ctxt->directory = xmlParserGetDirectory(filename);
13086 }
13087
13088 inputStream = xmlNewInputStream(ctxt);
13089 if (inputStream == NULL) {
13090 xmlFreeParserInputBuffer(buf);
13091 return(1);
13092 }
13093
13094 if (filename == NULL)
13095 inputStream->filename = NULL;
13096 else
13097 inputStream->filename = (char *)
13098 xmlCanonicPath((const xmlChar *) filename);
13099 inputStream->buf = buf;
13100 inputStream->base = inputStream->buf->buffer->content;
13101 inputStream->cur = inputStream->buf->buffer->content;
13102 inputStream->end =
13103 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13104
13105 inputPush(ctxt, inputStream);
13106
13107 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13108 (ctxt->input->buf != NULL)) {
13109 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13110 int cur = ctxt->input->cur - ctxt->input->base;
13111
13112 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13113
13114 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13115 ctxt->input->cur = ctxt->input->base + cur;
13116 ctxt->input->end =
13117 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13118 use];
13119#ifdef DEBUG_PUSH
13120 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13121#endif
13122 }
13123
13124 if (encoding != NULL) {
13125 xmlCharEncodingHandlerPtr hdlr;
13126
13127 hdlr = xmlFindCharEncodingHandler(encoding);
13128 if (hdlr != NULL) {
13129 xmlSwitchToEncoding(ctxt, hdlr);
13130 } else {
13131 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13132 "Unsupported encoding %s\n", BAD_CAST encoding);
13133 }
13134 } else if (enc != XML_CHAR_ENCODING_NONE) {
13135 xmlSwitchEncoding(ctxt, enc);
13136 }
13137
13138 return(0);
13139}
13140
13141/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013142 * xmlCtxtUseOptions:
13143 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013144 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013145 *
13146 * Applies the options to the parser context
13147 *
13148 * Returns 0 in case of success, the set of unknown or unimplemented options
13149 * in case of error.
13150 */
13151int
13152xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13153{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013154 if (ctxt == NULL)
13155 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013156 if (options & XML_PARSE_RECOVER) {
13157 ctxt->recovery = 1;
13158 options -= XML_PARSE_RECOVER;
13159 } else
13160 ctxt->recovery = 0;
13161 if (options & XML_PARSE_DTDLOAD) {
13162 ctxt->loadsubset = XML_DETECT_IDS;
13163 options -= XML_PARSE_DTDLOAD;
13164 } else
13165 ctxt->loadsubset = 0;
13166 if (options & XML_PARSE_DTDATTR) {
13167 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13168 options -= XML_PARSE_DTDATTR;
13169 }
13170 if (options & XML_PARSE_NOENT) {
13171 ctxt->replaceEntities = 1;
13172 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13173 options -= XML_PARSE_NOENT;
13174 } else
13175 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013176 if (options & XML_PARSE_PEDANTIC) {
13177 ctxt->pedantic = 1;
13178 options -= XML_PARSE_PEDANTIC;
13179 } else
13180 ctxt->pedantic = 0;
13181 if (options & XML_PARSE_NOBLANKS) {
13182 ctxt->keepBlanks = 0;
13183 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13184 options -= XML_PARSE_NOBLANKS;
13185 } else
13186 ctxt->keepBlanks = 1;
13187 if (options & XML_PARSE_DTDVALID) {
13188 ctxt->validate = 1;
13189 if (options & XML_PARSE_NOWARNING)
13190 ctxt->vctxt.warning = NULL;
13191 if (options & XML_PARSE_NOERROR)
13192 ctxt->vctxt.error = NULL;
13193 options -= XML_PARSE_DTDVALID;
13194 } else
13195 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013196 if (options & XML_PARSE_NOWARNING) {
13197 ctxt->sax->warning = NULL;
13198 options -= XML_PARSE_NOWARNING;
13199 }
13200 if (options & XML_PARSE_NOERROR) {
13201 ctxt->sax->error = NULL;
13202 ctxt->sax->fatalError = NULL;
13203 options -= XML_PARSE_NOERROR;
13204 }
Daniel Veillard81273902003-09-30 00:43:48 +000013205#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013206 if (options & XML_PARSE_SAX1) {
13207 ctxt->sax->startElement = xmlSAX2StartElement;
13208 ctxt->sax->endElement = xmlSAX2EndElement;
13209 ctxt->sax->startElementNs = NULL;
13210 ctxt->sax->endElementNs = NULL;
13211 ctxt->sax->initialized = 1;
13212 options -= XML_PARSE_SAX1;
13213 }
Daniel Veillard81273902003-09-30 00:43:48 +000013214#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013215 if (options & XML_PARSE_NODICT) {
13216 ctxt->dictNames = 0;
13217 options -= XML_PARSE_NODICT;
13218 } else {
13219 ctxt->dictNames = 1;
13220 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013221 if (options & XML_PARSE_NOCDATA) {
13222 ctxt->sax->cdataBlock = NULL;
13223 options -= XML_PARSE_NOCDATA;
13224 }
13225 if (options & XML_PARSE_NSCLEAN) {
13226 ctxt->options |= XML_PARSE_NSCLEAN;
13227 options -= XML_PARSE_NSCLEAN;
13228 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013229 if (options & XML_PARSE_NONET) {
13230 ctxt->options |= XML_PARSE_NONET;
13231 options -= XML_PARSE_NONET;
13232 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013233 if (options & XML_PARSE_COMPACT) {
13234 ctxt->options |= XML_PARSE_COMPACT;
13235 options -= XML_PARSE_COMPACT;
13236 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013237 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013238 return (options);
13239}
13240
13241/**
13242 * xmlDoRead:
13243 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013244 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013245 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013246 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013247 * @reuse: keep the context for reuse
13248 *
13249 * Common front-end for the xmlRead functions
13250 *
13251 * Returns the resulting document tree or NULL
13252 */
13253static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013254xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13255 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256{
13257 xmlDocPtr ret;
13258
13259 xmlCtxtUseOptions(ctxt, options);
13260 if (encoding != NULL) {
13261 xmlCharEncodingHandlerPtr hdlr;
13262
13263 hdlr = xmlFindCharEncodingHandler(encoding);
13264 if (hdlr != NULL)
13265 xmlSwitchToEncoding(ctxt, hdlr);
13266 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013267 if ((URL != NULL) && (ctxt->input != NULL) &&
13268 (ctxt->input->filename == NULL))
13269 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013270 xmlParseDocument(ctxt);
13271 if ((ctxt->wellFormed) || ctxt->recovery)
13272 ret = ctxt->myDoc;
13273 else {
13274 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013275 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013276 xmlFreeDoc(ctxt->myDoc);
13277 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013278 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013279 ctxt->myDoc = NULL;
13280 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013281 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013282 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013283
13284 return (ret);
13285}
13286
13287/**
13288 * xmlReadDoc:
13289 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013290 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013291 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013292 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013293 *
13294 * parse an XML in-memory document and build a tree.
13295 *
13296 * Returns the resulting document tree
13297 */
13298xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013299xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013300{
13301 xmlParserCtxtPtr ctxt;
13302
13303 if (cur == NULL)
13304 return (NULL);
13305
13306 ctxt = xmlCreateDocParserCtxt(cur);
13307 if (ctxt == NULL)
13308 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013309 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013310}
13311
13312/**
13313 * xmlReadFile:
13314 * @filename: a file or URL
13315 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013316 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013317 *
13318 * parse an XML file from the filesystem or the network.
13319 *
13320 * Returns the resulting document tree
13321 */
13322xmlDocPtr
13323xmlReadFile(const char *filename, const char *encoding, int options)
13324{
13325 xmlParserCtxtPtr ctxt;
13326
Daniel Veillard61b93382003-11-03 14:28:31 +000013327 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013328 if (ctxt == NULL)
13329 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013330 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013331}
13332
13333/**
13334 * xmlReadMemory:
13335 * @buffer: a pointer to a char array
13336 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013337 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013338 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013339 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013340 *
13341 * parse an XML in-memory document and build a tree.
13342 *
13343 * Returns the resulting document tree
13344 */
13345xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013346xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013347{
13348 xmlParserCtxtPtr ctxt;
13349
13350 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13351 if (ctxt == NULL)
13352 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013353 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013354}
13355
13356/**
13357 * xmlReadFd:
13358 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013359 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013360 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013361 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013362 *
13363 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013364 * NOTE that the file descriptor will not be closed when the
13365 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013366 *
13367 * Returns the resulting document tree
13368 */
13369xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013370xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013371{
13372 xmlParserCtxtPtr ctxt;
13373 xmlParserInputBufferPtr input;
13374 xmlParserInputPtr stream;
13375
13376 if (fd < 0)
13377 return (NULL);
13378
13379 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13380 if (input == NULL)
13381 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013382 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013383 ctxt = xmlNewParserCtxt();
13384 if (ctxt == NULL) {
13385 xmlFreeParserInputBuffer(input);
13386 return (NULL);
13387 }
13388 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13389 if (stream == NULL) {
13390 xmlFreeParserInputBuffer(input);
13391 xmlFreeParserCtxt(ctxt);
13392 return (NULL);
13393 }
13394 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013395 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013396}
13397
13398/**
13399 * xmlReadIO:
13400 * @ioread: an I/O read function
13401 * @ioclose: an I/O close function
13402 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013403 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013404 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013405 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013406 *
13407 * parse an XML document from I/O functions and source and build a tree.
13408 *
13409 * Returns the resulting document tree
13410 */
13411xmlDocPtr
13412xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013413 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013414{
13415 xmlParserCtxtPtr ctxt;
13416 xmlParserInputBufferPtr input;
13417 xmlParserInputPtr stream;
13418
13419 if (ioread == NULL)
13420 return (NULL);
13421
13422 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13423 XML_CHAR_ENCODING_NONE);
13424 if (input == NULL)
13425 return (NULL);
13426 ctxt = xmlNewParserCtxt();
13427 if (ctxt == NULL) {
13428 xmlFreeParserInputBuffer(input);
13429 return (NULL);
13430 }
13431 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13432 if (stream == NULL) {
13433 xmlFreeParserInputBuffer(input);
13434 xmlFreeParserCtxt(ctxt);
13435 return (NULL);
13436 }
13437 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013438 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013439}
13440
13441/**
13442 * xmlCtxtReadDoc:
13443 * @ctxt: an XML parser context
13444 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013445 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013446 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013447 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013448 *
13449 * parse an XML in-memory document and build a tree.
13450 * This reuses the existing @ctxt parser context
13451 *
13452 * Returns the resulting document tree
13453 */
13454xmlDocPtr
13455xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013456 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013457{
13458 xmlParserInputPtr stream;
13459
13460 if (cur == NULL)
13461 return (NULL);
13462 if (ctxt == NULL)
13463 return (NULL);
13464
13465 xmlCtxtReset(ctxt);
13466
13467 stream = xmlNewStringInputStream(ctxt, cur);
13468 if (stream == NULL) {
13469 return (NULL);
13470 }
13471 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013472 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013473}
13474
13475/**
13476 * xmlCtxtReadFile:
13477 * @ctxt: an XML parser context
13478 * @filename: a file or URL
13479 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013480 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013481 *
13482 * parse an XML file from the filesystem or the network.
13483 * This reuses the existing @ctxt parser context
13484 *
13485 * Returns the resulting document tree
13486 */
13487xmlDocPtr
13488xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13489 const char *encoding, int options)
13490{
13491 xmlParserInputPtr stream;
13492
13493 if (filename == NULL)
13494 return (NULL);
13495 if (ctxt == NULL)
13496 return (NULL);
13497
13498 xmlCtxtReset(ctxt);
13499
Daniel Veillard29614c72004-11-26 10:47:26 +000013500 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013501 if (stream == NULL) {
13502 return (NULL);
13503 }
13504 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013505 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013506}
13507
13508/**
13509 * xmlCtxtReadMemory:
13510 * @ctxt: an XML parser context
13511 * @buffer: a pointer to a char array
13512 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013513 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013514 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013515 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013516 *
13517 * parse an XML in-memory document and build a tree.
13518 * This reuses the existing @ctxt parser context
13519 *
13520 * Returns the resulting document tree
13521 */
13522xmlDocPtr
13523xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013524 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013525{
13526 xmlParserInputBufferPtr input;
13527 xmlParserInputPtr stream;
13528
13529 if (ctxt == NULL)
13530 return (NULL);
13531 if (buffer == NULL)
13532 return (NULL);
13533
13534 xmlCtxtReset(ctxt);
13535
13536 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13537 if (input == NULL) {
13538 return(NULL);
13539 }
13540
13541 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13542 if (stream == NULL) {
13543 xmlFreeParserInputBuffer(input);
13544 return(NULL);
13545 }
13546
13547 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013548 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013549}
13550
13551/**
13552 * xmlCtxtReadFd:
13553 * @ctxt: an XML parser context
13554 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013555 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013556 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013557 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013558 *
13559 * parse an XML from a file descriptor and build a tree.
13560 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013561 * NOTE that the file descriptor will not be closed when the
13562 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013563 *
13564 * Returns the resulting document tree
13565 */
13566xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013567xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13568 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013569{
13570 xmlParserInputBufferPtr input;
13571 xmlParserInputPtr stream;
13572
13573 if (fd < 0)
13574 return (NULL);
13575 if (ctxt == NULL)
13576 return (NULL);
13577
13578 xmlCtxtReset(ctxt);
13579
13580
13581 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13582 if (input == NULL)
13583 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013584 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013585 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13586 if (stream == NULL) {
13587 xmlFreeParserInputBuffer(input);
13588 return (NULL);
13589 }
13590 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013591 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013592}
13593
13594/**
13595 * xmlCtxtReadIO:
13596 * @ctxt: an XML parser context
13597 * @ioread: an I/O read function
13598 * @ioclose: an I/O close function
13599 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013600 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013601 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013602 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013603 *
13604 * parse an XML document from I/O functions and source and build a tree.
13605 * This reuses the existing @ctxt parser context
13606 *
13607 * Returns the resulting document tree
13608 */
13609xmlDocPtr
13610xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13611 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013612 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013613 const char *encoding, int options)
13614{
13615 xmlParserInputBufferPtr input;
13616 xmlParserInputPtr stream;
13617
13618 if (ioread == NULL)
13619 return (NULL);
13620 if (ctxt == NULL)
13621 return (NULL);
13622
13623 xmlCtxtReset(ctxt);
13624
13625 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13626 XML_CHAR_ENCODING_NONE);
13627 if (input == NULL)
13628 return (NULL);
13629 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13630 if (stream == NULL) {
13631 xmlFreeParserInputBuffer(input);
13632 return (NULL);
13633 }
13634 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013635 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013636}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013637
13638#define bottom_parser
13639#include "elfgcchack.h"