blob: 6b5e86bfdc3f2fc0d2c13ed1f5e84ed6388d8051 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
3077 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003213 } else
3214 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217
3218mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003219 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003221}
3222
3223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224 * xmlParseAttValue:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3230 *
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3233 *
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3252 *
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3254 */
3255
3256
3257xmlChar *
3258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261}
3262
3263/**
Owen Taylor3473f882001-02-23 17:55:21 +00003264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3266 *
3267 * parse an XML Literal
3268 *
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3270 *
3271 * Returns the SystemLiteral parsed or NULL
3272 */
3273
3274xmlChar *
3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3277 int len = 0;
3278 int size = XML_PARSER_BUFFER_SIZE;
3279 int cur, l;
3280 xmlChar stop;
3281 int state = ctxt->instate;
3282 int count = 0;
3283
3284 SHRINK;
3285 if (RAW == '"') {
3286 NEXT;
3287 stop = '"';
3288 } else if (RAW == '\'') {
3289 NEXT;
3290 stop = '\'';
3291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003293 return(NULL);
3294 }
3295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 xmlChar *tmp;
3306
Owen Taylor3473f882001-02-23 17:55:21 +00003307 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3309 if (tmp == NULL) {
3310 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 ctxt->instate = (xmlParserInputState) state;
3313 return(NULL);
3314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003316 }
3317 count++;
3318 if (count > 50) {
3319 GROW;
3320 count = 0;
3321 }
3322 COPY_BUF(l,buf,len,cur);
3323 NEXTL(l);
3324 cur = CUR_CHAR(l);
3325 if (cur == 0) {
3326 GROW;
3327 SHRINK;
3328 cur = CUR_CHAR(l);
3329 }
3330 }
3331 buf[len] = 0;
3332 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003333 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 } else {
3336 NEXT;
3337 }
3338 return(buf);
3339}
3340
3341/**
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML public literal
3346 *
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3348 *
3349 * Returns the PubidLiteral parsed or NULL.
3350 */
3351
3352xmlChar *
3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3355 int len = 0;
3356 int size = XML_PARSER_BUFFER_SIZE;
3357 xmlChar cur;
3358 xmlChar stop;
3359 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003360 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 SHRINK;
3363 if (RAW == '"') {
3364 NEXT;
3365 stop = '"';
3366 } else if (RAW == '\'') {
3367 NEXT;
3368 stop = '\'';
3369 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003371 return(NULL);
3372 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 xmlChar *tmp;
3383
Owen Taylor3473f882001-02-23 17:55:21 +00003384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(NULL);
3390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 }
3393 buf[len++] = cur;
3394 count++;
3395 if (count > 50) {
3396 GROW;
3397 count = 0;
3398 }
3399 NEXT;
3400 cur = CUR;
3401 if (cur == 0) {
3402 GROW;
3403 SHRINK;
3404 cur = CUR;
3405 }
3406 }
3407 buf[len] = 0;
3408 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 } else {
3411 NEXT;
3412 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003413 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(buf);
3415}
3416
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003418
3419/*
3420 * used for the test in the inner loop of the char data testing
3421 */
3422static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3455};
3456
Owen Taylor3473f882001-02-23 17:55:21 +00003457/**
3458 * xmlParseCharData:
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3461 *
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3464 *
3465 * The right angle bracket (>) may be represented using the string "&gt;",
3466 * and must, for compatibility, be escaped using "&gt;" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3469 *
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3471 */
3472
3473void
3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003479 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003480
3481 SHRINK;
3482 GROW;
3483 /*
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003487 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003488 in = ctxt->input->cur;
3489 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003490get_more_space:
3491 while (*in == 0x20) in++;
3492 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003494 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003496 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003497 goto get_more_space;
3498 }
3499 if (*in == '<') {
3500 nbchar = in - ctxt->input->cur;
3501 if (nbchar > 0) {
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3504
Daniel Veillard34099b42004-11-04 17:34:35 +00003505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 if (*ctxt->space == -1)
3517 *ctxt->space = -2;
3518 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003521 ctxt->sax->characters(ctxt->userData,
3522 tmp, nbchar);
3523 }
3524 }
3525 return;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003528get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003529 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003530 while (test_char_data[*in]) {
3531 in++;
3532 ccol++;
3533 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003535 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003536 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003537 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003539 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003540 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003541 }
3542 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003545 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003546 return;
3547 }
3548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 goto get_more;
3551 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003552 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003553 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003556 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003557 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003565 } else {
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
3569 if (*ctxt->space == -1)
3570 *ctxt->space = -2;
3571 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003572 line = ctxt->input->line;
3573 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003574 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003578 line = ctxt->input->line;
3579 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003583 if (*in == 0xD) {
3584 in++;
3585 if (*in == 0xA) {
3586 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003588 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 in--;
3592 }
3593 if (*in == '<') {
3594 return;
3595 }
3596 if (*in == '&') {
3597 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
3599 SHRINK;
3600 GROW;
3601 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 nbchar = 0;
3604 }
Daniel Veillard50582112001-03-26 22:52:16 +00003605 ctxt->input->line = line;
3606 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 xmlParseCharDataComplex(ctxt, cdata);
3608}
3609
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610/**
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3614 *
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3618 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003619void
3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3622 int nbchar = 0;
3623 int cur, l;
3624 int count = 0;
3625
3626 SHRINK;
3627 GROW;
3628 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003629 while ((cur != '<') && /* checked */
3630 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if ((cur == ']') && (NXT(1) == ']') &&
3633 (NXT(2) == '>')) {
3634 if (cdata) break;
3635 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003637 }
3638 }
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003641 buf[nbchar] = 0;
3642
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003647 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3650 buf, nbchar);
3651 } else {
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 nbchar = 0;
3661 }
3662 count++;
3663 if (count > 50) {
3664 GROW;
3665 count = 0;
3666 }
3667 NEXTL(l);
3668 cur = CUR_CHAR(l);
3669 }
3670 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003671 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003673 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003674 */
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003676 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3684 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 }
3687 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3692 cur);
3693 NEXTL(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695}
3696
3697/**
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3703 *
3704 * Parse an External ID or a Public ID
3705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3708 *
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3711 *
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3713 *
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3717 */
3718
3719xmlChar *
3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3722
3723 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003724
3725 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003727 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 SKIP_BLANKS;
3733 URI = xmlParseSystemLiteral(ctxt);
3734 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (strict) {
3749 /*
3750 * We don't handle [83] so "S SystemLiteral" is required.
3751 */
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 } else {
3757 /*
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3761 */
3762 const xmlChar *ptr;
3763 GROW;
3764
3765 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767
William M. Brack76e95df2003-10-18 16:20:14 +00003768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3770 }
3771 SKIP_BLANKS;
3772 URI = xmlParseSystemLiteral(ctxt);
3773 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 }
3777 return(URI);
3778}
3779
3780/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003782 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003786 *
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003790 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3793 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003794static void
3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 int q, ql;
3797 int r, rl;
3798 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 xmlParserInputPtr input = ctxt->input;
3800 int count = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003803 len = 0;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3806 if (buf == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 return;
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003812 if (q == 0)
3813 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 NEXTL(ql);
3815 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003816 if (r == 0)
3817 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003818 NEXTL(rl);
3819 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003820 if (cur == 0)
3821 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003822 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003823 ((cur != '>') ||
3824 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003825 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003826 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003827 }
3828 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003829 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003830 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003831 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3832 if (new_buf == NULL) {
3833 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003834 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003835 return;
3836 }
William M. Bracka3215c72004-07-31 16:24:01 +00003837 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
3839 COPY_BUF(ql,buf,len,q);
3840 q = r;
3841 ql = rl;
3842 r = cur;
3843 rl = l;
3844
3845 count++;
3846 if (count > 50) {
3847 GROW;
3848 count = 0;
3849 }
3850 NEXTL(l);
3851 cur = CUR_CHAR(l);
3852 if (cur == 0) {
3853 SHRINK;
3854 GROW;
3855 cur = CUR_CHAR(l);
3856 }
3857 }
3858 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003859 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003861 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003862 xmlFree(buf);
3863 } else {
3864 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003867 }
3868 NEXT;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870 (!ctxt->disableSAX))
3871 ctxt->sax->comment(ctxt->userData, buf);
3872 xmlFree(buf);
3873 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003874 return;
3875not_terminated:
3876 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3877 "Comment not terminated\n", NULL);
3878 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003879}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003880/**
3881 * xmlParseComment:
3882 * @ctxt: an XML parser context
3883 *
3884 * Skip an XML (SGML) comment <!-- .... -->
3885 * The spec says that "For compatibility, the string "--" (double-hyphen)
3886 * must not occur within comments. "
3887 *
3888 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3889 */
3890void
3891xmlParseComment(xmlParserCtxtPtr ctxt) {
3892 xmlChar *buf = NULL;
3893 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003894 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003895 xmlParserInputState state;
3896 const xmlChar *in;
3897 int nbchar = 0, ccol;
3898
3899 /*
3900 * Check that there is a comment right here.
3901 */
3902 if ((RAW != '<') || (NXT(1) != '!') ||
3903 (NXT(2) != '-') || (NXT(3) != '-')) return;
3904
3905 state = ctxt->instate;
3906 ctxt->instate = XML_PARSER_COMMENT;
3907 SKIP(4);
3908 SHRINK;
3909 GROW;
3910
3911 /*
3912 * Accelerated common case where input don't need to be
3913 * modified before passing it to the handler.
3914 */
3915 in = ctxt->input->cur;
3916 do {
3917 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003918 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003919 ctxt->input->line++; ctxt->input->col = 1;
3920 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003921 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003922 }
3923get_more:
3924 ccol = ctxt->input->col;
3925 while (((*in > '-') && (*in <= 0x7F)) ||
3926 ((*in >= 0x20) && (*in < '-')) ||
3927 (*in == 0x09)) {
3928 in++;
3929 ccol++;
3930 }
3931 ctxt->input->col = ccol;
3932 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003933 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003934 ctxt->input->line++; ctxt->input->col = 1;
3935 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003936 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003937 goto get_more;
3938 }
3939 nbchar = in - ctxt->input->cur;
3940 /*
3941 * save current set of data
3942 */
3943 if (nbchar > 0) {
3944 if ((ctxt->sax != NULL) &&
3945 (ctxt->sax->comment != NULL)) {
3946 if (buf == NULL) {
3947 if ((*in == '-') && (in[1] == '-'))
3948 size = nbchar + 1;
3949 else
3950 size = XML_PARSER_BUFFER_SIZE + nbchar;
3951 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3952 if (buf == NULL) {
3953 xmlErrMemory(ctxt, NULL);
3954 ctxt->instate = state;
3955 return;
3956 }
3957 len = 0;
3958 } else if (len + nbchar + 1 >= size) {
3959 xmlChar *new_buf;
3960 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3961 new_buf = (xmlChar *) xmlRealloc(buf,
3962 size * sizeof(xmlChar));
3963 if (new_buf == NULL) {
3964 xmlFree (buf);
3965 xmlErrMemory(ctxt, NULL);
3966 ctxt->instate = state;
3967 return;
3968 }
3969 buf = new_buf;
3970 }
3971 memcpy(&buf[len], ctxt->input->cur, nbchar);
3972 len += nbchar;
3973 buf[len] = 0;
3974 }
3975 }
3976 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003977 if (*in == 0xA) {
3978 in++;
3979 ctxt->input->line++; ctxt->input->col = 1;
3980 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003981 if (*in == 0xD) {
3982 in++;
3983 if (*in == 0xA) {
3984 ctxt->input->cur = in;
3985 in++;
3986 ctxt->input->line++; ctxt->input->col = 1;
3987 continue; /* while */
3988 }
3989 in--;
3990 }
3991 SHRINK;
3992 GROW;
3993 in = ctxt->input->cur;
3994 if (*in == '-') {
3995 if (in[1] == '-') {
3996 if (in[2] == '>') {
3997 SKIP(3);
3998 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3999 (!ctxt->disableSAX)) {
4000 if (buf != NULL)
4001 ctxt->sax->comment(ctxt->userData, buf);
4002 else
4003 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4004 }
4005 if (buf != NULL)
4006 xmlFree(buf);
4007 ctxt->instate = state;
4008 return;
4009 }
4010 if (buf != NULL)
4011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4012 "Comment not terminated \n<!--%.50s\n",
4013 buf);
4014 else
4015 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4016 "Comment not terminated \n", NULL);
4017 in++;
4018 ctxt->input->col++;
4019 }
4020 in++;
4021 ctxt->input->col++;
4022 goto get_more;
4023 }
4024 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4025 xmlParseCommentComplex(ctxt, buf, len, size);
4026 ctxt->instate = state;
4027 return;
4028}
4029
Owen Taylor3473f882001-02-23 17:55:21 +00004030
4031/**
4032 * xmlParsePITarget:
4033 * @ctxt: an XML parser context
4034 *
4035 * parse the name of a PI
4036 *
4037 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4038 *
4039 * Returns the PITarget name or NULL
4040 */
4041
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004042const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004043xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004044 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004045
4046 name = xmlParseName(ctxt);
4047 if ((name != NULL) &&
4048 ((name[0] == 'x') || (name[0] == 'X')) &&
4049 ((name[1] == 'm') || (name[1] == 'M')) &&
4050 ((name[2] == 'l') || (name[2] == 'L'))) {
4051 int i;
4052 if ((name[0] == 'x') && (name[1] == 'm') &&
4053 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004054 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004055 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004056 return(name);
4057 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004058 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004059 return(name);
4060 }
4061 for (i = 0;;i++) {
4062 if (xmlW3CPIs[i] == NULL) break;
4063 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4064 return(name);
4065 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004066 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4067 "xmlParsePITarget: invalid name prefix 'xml'\n",
4068 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070 return(name);
4071}
4072
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073#ifdef LIBXML_CATALOG_ENABLED
4074/**
4075 * xmlParseCatalogPI:
4076 * @ctxt: an XML parser context
4077 * @catalog: the PI value string
4078 *
4079 * parse an XML Catalog Processing Instruction.
4080 *
4081 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4082 *
4083 * Occurs only if allowed by the user and if happening in the Misc
4084 * part of the document before any doctype informations
4085 * This will add the given catalog to the parsing context in order
4086 * to be used if there is a resolution need further down in the document
4087 */
4088
4089static void
4090xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4091 xmlChar *URL = NULL;
4092 const xmlChar *tmp, *base;
4093 xmlChar marker;
4094
4095 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004096 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004097 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4098 goto error;
4099 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004100 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004101 if (*tmp != '=') {
4102 return;
4103 }
4104 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004105 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004106 marker = *tmp;
4107 if ((marker != '\'') && (marker != '"'))
4108 goto error;
4109 tmp++;
4110 base = tmp;
4111 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4112 if (*tmp == 0)
4113 goto error;
4114 URL = xmlStrndup(base, tmp - base);
4115 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004116 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004117 if (*tmp != 0)
4118 goto error;
4119
4120 if (URL != NULL) {
4121 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4122 xmlFree(URL);
4123 }
4124 return;
4125
4126error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004127 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4128 "Catalog PI syntax error: %s\n",
4129 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004130 if (URL != NULL)
4131 xmlFree(URL);
4132}
4133#endif
4134
Owen Taylor3473f882001-02-23 17:55:21 +00004135/**
4136 * xmlParsePI:
4137 * @ctxt: an XML parser context
4138 *
4139 * parse an XML Processing Instruction.
4140 *
4141 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4142 *
4143 * The processing is transfered to SAX once parsed.
4144 */
4145
4146void
4147xmlParsePI(xmlParserCtxtPtr ctxt) {
4148 xmlChar *buf = NULL;
4149 int len = 0;
4150 int size = XML_PARSER_BUFFER_SIZE;
4151 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004152 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004153 xmlParserInputState state;
4154 int count = 0;
4155
4156 if ((RAW == '<') && (NXT(1) == '?')) {
4157 xmlParserInputPtr input = ctxt->input;
4158 state = ctxt->instate;
4159 ctxt->instate = XML_PARSER_PI;
4160 /*
4161 * this is a Processing Instruction.
4162 */
4163 SKIP(2);
4164 SHRINK;
4165
4166 /*
4167 * Parse the target name and check for special support like
4168 * namespace.
4169 */
4170 target = xmlParsePITarget(ctxt);
4171 if (target != NULL) {
4172 if ((RAW == '?') && (NXT(1) == '>')) {
4173 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4175 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004176 }
4177 SKIP(2);
4178
4179 /*
4180 * SAX: PI detected.
4181 */
4182 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4183 (ctxt->sax->processingInstruction != NULL))
4184 ctxt->sax->processingInstruction(ctxt->userData,
4185 target, NULL);
4186 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 return;
4188 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004189 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004191 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004192 ctxt->instate = state;
4193 return;
4194 }
4195 cur = CUR;
4196 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004197 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4198 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 SKIP_BLANKS;
4201 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004202 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004203 ((cur != '?') || (NXT(1) != '>'))) {
4204 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004205 xmlChar *tmp;
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4209 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004210 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004211 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 ctxt->instate = state;
4213 return;
4214 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004215 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 count++;
4218 if (count > 50) {
4219 GROW;
4220 count = 0;
4221 }
4222 COPY_BUF(l,buf,len,cur);
4223 NEXTL(l);
4224 cur = CUR_CHAR(l);
4225 if (cur == 0) {
4226 SHRINK;
4227 GROW;
4228 cur = CUR_CHAR(l);
4229 }
4230 }
4231 buf[len] = 0;
4232 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004233 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4234 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 } else {
4236 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4238 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004239 }
4240 SKIP(2);
4241
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004242#ifdef LIBXML_CATALOG_ENABLED
4243 if (((state == XML_PARSER_MISC) ||
4244 (state == XML_PARSER_START)) &&
4245 (xmlStrEqual(target, XML_CATALOG_PI))) {
4246 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4247 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4248 (allow == XML_CATA_ALLOW_ALL))
4249 xmlParseCatalogPI(ctxt, buf);
4250 }
4251#endif
4252
4253
Owen Taylor3473f882001-02-23 17:55:21 +00004254 /*
4255 * SAX: PI detected.
4256 */
4257 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->processingInstruction != NULL))
4259 ctxt->sax->processingInstruction(ctxt->userData,
4260 target, buf);
4261 }
4262 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 }
4266 ctxt->instate = state;
4267 }
4268}
4269
4270/**
4271 * xmlParseNotationDecl:
4272 * @ctxt: an XML parser context
4273 *
4274 * parse a notation declaration
4275 *
4276 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4277 *
4278 * Hence there is actually 3 choices:
4279 * 'PUBLIC' S PubidLiteral
4280 * 'PUBLIC' S PubidLiteral S SystemLiteral
4281 * and 'SYSTEM' S SystemLiteral
4282 *
4283 * See the NOTE on xmlParseExternalID().
4284 */
4285
4286void
4287xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004288 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 xmlChar *Pubid;
4290 xmlChar *Systemid;
4291
Daniel Veillarda07050d2003-10-19 14:46:32 +00004292 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004293 xmlParserInputPtr input = ctxt->input;
4294 SHRINK;
4295 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004299 return;
4300 }
4301 SKIP_BLANKS;
4302
Daniel Veillard76d66f42001-05-16 21:05:17 +00004303 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004305 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 return;
4307 }
William M. Brack76e95df2003-10-18 16:20:14 +00004308 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004310 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004311 return;
4312 }
4313 SKIP_BLANKS;
4314
4315 /*
4316 * Parse the IDs.
4317 */
4318 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4319 SKIP_BLANKS;
4320
4321 if (RAW == '>') {
4322 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4324 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326 NEXT;
4327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4328 (ctxt->sax->notationDecl != NULL))
4329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (Systemid != NULL) xmlFree(Systemid);
4334 if (Pubid != NULL) xmlFree(Pubid);
4335 }
4336}
4337
4338/**
4339 * xmlParseEntityDecl:
4340 * @ctxt: an XML parser context
4341 *
4342 * parse <!ENTITY declarations
4343 *
4344 * [70] EntityDecl ::= GEDecl | PEDecl
4345 *
4346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4347 *
4348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4349 *
4350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4351 *
4352 * [74] PEDef ::= EntityValue | ExternalID
4353 *
4354 * [76] NDataDecl ::= S 'NDATA' S Name
4355 *
4356 * [ VC: Notation Declared ]
4357 * The Name must match the declared name of a notation.
4358 */
4359
4360void
4361xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004362 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 xmlChar *value = NULL;
4364 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004365 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 int isParameter = 0;
4367 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004368 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004369
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004371 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004372 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SHRINK;
4374 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004375 skipped = SKIP_BLANKS;
4376 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004377 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4378 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
Owen Taylor3473f882001-02-23 17:55:21 +00004380
4381 if (RAW == '%') {
4382 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004383 skipped = SKIP_BLANKS;
4384 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4386 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004387 }
Owen Taylor3473f882001-02-23 17:55:21 +00004388 isParameter = 1;
4389 }
4390
Daniel Veillard76d66f42001-05-16 21:05:17 +00004391 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004393 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4394 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004395 return;
4396 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004397 skipped = SKIP_BLANKS;
4398 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4400 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004401 }
Owen Taylor3473f882001-02-23 17:55:21 +00004402
Daniel Veillardf5582f12002-06-11 10:08:16 +00004403 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 /*
4405 * handle the various case of definitions...
4406 */
4407 if (isParameter) {
4408 if ((RAW == '"') || (RAW == '\'')) {
4409 value = xmlParseEntityValue(ctxt, &orig);
4410 if (value) {
4411 if ((ctxt->sax != NULL) &&
4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413 ctxt->sax->entityDecl(ctxt->userData, name,
4414 XML_INTERNAL_PARAMETER_ENTITY,
4415 NULL, NULL, value);
4416 }
4417 } else {
4418 URI = xmlParseExternalID(ctxt, &literal, 1);
4419 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004421 }
4422 if (URI) {
4423 xmlURIPtr uri;
4424
4425 uri = xmlParseURI((const char *) URI);
4426 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004429 /*
4430 * This really ought to be a well formedness error
4431 * but the XML Core WG decided otherwise c.f. issue
4432 * E26 of the XML erratas.
4433 */
Owen Taylor3473f882001-02-23 17:55:21 +00004434 } else {
4435 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004436 /*
4437 * Okay this is foolish to block those but not
4438 * invalid URIs.
4439 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004441 } else {
4442 if ((ctxt->sax != NULL) &&
4443 (!ctxt->disableSAX) &&
4444 (ctxt->sax->entityDecl != NULL))
4445 ctxt->sax->entityDecl(ctxt->userData, name,
4446 XML_EXTERNAL_PARAMETER_ENTITY,
4447 literal, URI, NULL);
4448 }
4449 xmlFreeURI(uri);
4450 }
4451 }
4452 }
4453 } else {
4454 if ((RAW == '"') || (RAW == '\'')) {
4455 value = xmlParseEntityValue(ctxt, &orig);
4456 if ((ctxt->sax != NULL) &&
4457 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4458 ctxt->sax->entityDecl(ctxt->userData, name,
4459 XML_INTERNAL_GENERAL_ENTITY,
4460 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004461 /*
4462 * For expat compatibility in SAX mode.
4463 */
4464 if ((ctxt->myDoc == NULL) ||
4465 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4466 if (ctxt->myDoc == NULL) {
4467 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4468 }
4469 if (ctxt->myDoc->intSubset == NULL)
4470 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4471 BAD_CAST "fake", NULL, NULL);
4472
Daniel Veillard1af9a412003-08-20 22:54:39 +00004473 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4474 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004475 }
Owen Taylor3473f882001-02-23 17:55:21 +00004476 } else {
4477 URI = xmlParseExternalID(ctxt, &literal, 1);
4478 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 }
4481 if (URI) {
4482 xmlURIPtr uri;
4483
4484 uri = xmlParseURI((const char *)URI);
4485 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4487 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004488 /*
4489 * This really ought to be a well formedness error
4490 * but the XML Core WG decided otherwise c.f. issue
4491 * E26 of the XML erratas.
4492 */
Owen Taylor3473f882001-02-23 17:55:21 +00004493 } else {
4494 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004495 /*
4496 * Okay this is foolish to block those but not
4497 * invalid URIs.
4498 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004500 }
4501 xmlFreeURI(uri);
4502 }
4503 }
William M. Brack76e95df2003-10-18 16:20:14 +00004504 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4506 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004507 }
4508 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004509 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004510 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 }
4515 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004516 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4518 (ctxt->sax->unparsedEntityDecl != NULL))
4519 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4520 literal, URI, ndata);
4521 } else {
4522 if ((ctxt->sax != NULL) &&
4523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4524 ctxt->sax->entityDecl(ctxt->userData, name,
4525 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4526 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004527 /*
4528 * For expat compatibility in SAX mode.
4529 * assuming the entity repalcement was asked for
4530 */
4531 if ((ctxt->replaceEntities != 0) &&
4532 ((ctxt->myDoc == NULL) ||
4533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4534 if (ctxt->myDoc == NULL) {
4535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4536 }
4537
4538 if (ctxt->myDoc->intSubset == NULL)
4539 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4540 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004541 xmlSAX2EntityDecl(ctxt, name,
4542 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4543 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004544 }
Owen Taylor3473f882001-02-23 17:55:21 +00004545 }
4546 }
4547 }
4548 SKIP_BLANKS;
4549 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004551 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 } else {
4553 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004554 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4555 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004556 }
4557 NEXT;
4558 }
4559 if (orig != NULL) {
4560 /*
4561 * Ugly mechanism to save the raw entity value.
4562 */
4563 xmlEntityPtr cur = NULL;
4564
4565 if (isParameter) {
4566 if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->getParameterEntity != NULL))
4568 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4569 } else {
4570 if ((ctxt->sax != NULL) &&
4571 (ctxt->sax->getEntity != NULL))
4572 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004573 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004574 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004575 }
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
4577 if (cur != NULL) {
4578 if (cur->orig != NULL)
4579 xmlFree(orig);
4580 else
4581 cur->orig = orig;
4582 } else
4583 xmlFree(orig);
4584 }
Owen Taylor3473f882001-02-23 17:55:21 +00004585 if (value != NULL) xmlFree(value);
4586 if (URI != NULL) xmlFree(URI);
4587 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589}
4590
4591/**
4592 * xmlParseDefaultDecl:
4593 * @ctxt: an XML parser context
4594 * @value: Receive a possible fixed default value for the attribute
4595 *
4596 * Parse an attribute default declaration
4597 *
4598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4599 *
4600 * [ VC: Required Attribute ]
4601 * if the default declaration is the keyword #REQUIRED, then the
4602 * attribute must be specified for all elements of the type in the
4603 * attribute-list declaration.
4604 *
4605 * [ VC: Attribute Default Legal ]
4606 * The declared default value must meet the lexical constraints of
4607 * the declared attribute type c.f. xmlValidateAttributeDecl()
4608 *
4609 * [ VC: Fixed Attribute Default ]
4610 * if an attribute has a default value declared with the #FIXED
4611 * keyword, instances of that attribute must match the default value.
4612 *
4613 * [ WFC: No < in Attribute Values ]
4614 * handled in xmlParseAttValue()
4615 *
4616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4617 * or XML_ATTRIBUTE_FIXED.
4618 */
4619
4620int
4621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4622 int val;
4623 xmlChar *ret;
4624
4625 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004626 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004627 SKIP(9);
4628 return(XML_ATTRIBUTE_REQUIRED);
4629 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004630 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004631 SKIP(8);
4632 return(XML_ATTRIBUTE_IMPLIED);
4633 }
4634 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004635 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004636 SKIP(6);
4637 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004638 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4640 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 SKIP_BLANKS;
4643 }
4644 ret = xmlParseAttValue(ctxt);
4645 ctxt->instate = XML_PARSER_DTD;
4646 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004647 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004648 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 } else
4650 *value = ret;
4651 return(val);
4652}
4653
4654/**
4655 * xmlParseNotationType:
4656 * @ctxt: an XML parser context
4657 *
4658 * parse an Notation attribute type.
4659 *
4660 * Note: the leading 'NOTATION' S part has already being parsed...
4661 *
4662 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4663 *
4664 * [ VC: Notation Attributes ]
4665 * Values of this type must match one of the notation names included
4666 * in the declaration; all notation names in the declaration must be declared.
4667 *
4668 * Returns: the notation attribute tree built while parsing
4669 */
4670
4671xmlEnumerationPtr
4672xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004673 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4675
4676 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 return(NULL);
4679 }
4680 SHRINK;
4681 do {
4682 NEXT;
4683 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004684 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4687 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(ret);
4689 }
4690 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (cur == NULL) return(ret);
4692 if (last == NULL) ret = last = cur;
4693 else {
4694 last->next = cur;
4695 last = cur;
4696 }
4697 SKIP_BLANKS;
4698 } while (RAW == '|');
4699 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 if ((last != NULL) && (last != ret))
4702 xmlFreeEnumeration(last);
4703 return(ret);
4704 }
4705 NEXT;
4706 return(ret);
4707}
4708
4709/**
4710 * xmlParseEnumerationType:
4711 * @ctxt: an XML parser context
4712 *
4713 * parse an Enumeration attribute type.
4714 *
4715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4716 *
4717 * [ VC: Enumeration ]
4718 * Values of this type must match one of the Nmtoken tokens in
4719 * the declaration
4720 *
4721 * Returns: the enumeration attribute tree built while parsing
4722 */
4723
4724xmlEnumerationPtr
4725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4726 xmlChar *name;
4727 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4728
4729 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 return(NULL);
4732 }
4733 SHRINK;
4734 do {
4735 NEXT;
4736 SKIP_BLANKS;
4737 name = xmlParseNmtoken(ctxt);
4738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 return(ret);
4741 }
4742 cur = xmlCreateEnumeration(name);
4743 xmlFree(name);
4744 if (cur == NULL) return(ret);
4745 if (last == NULL) ret = last = cur;
4746 else {
4747 last->next = cur;
4748 last = cur;
4749 }
4750 SKIP_BLANKS;
4751 } while (RAW == '|');
4752 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004753 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004754 return(ret);
4755 }
4756 NEXT;
4757 return(ret);
4758}
4759
4760/**
4761 * xmlParseEnumeratedType:
4762 * @ctxt: an XML parser context
4763 * @tree: the enumeration tree built while parsing
4764 *
4765 * parse an Enumerated attribute type.
4766 *
4767 * [57] EnumeratedType ::= NotationType | Enumeration
4768 *
4769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4770 *
4771 *
4772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4773 */
4774
4775int
4776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004777 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004778 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004779 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4781 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004782 return(0);
4783 }
4784 SKIP_BLANKS;
4785 *tree = xmlParseNotationType(ctxt);
4786 if (*tree == NULL) return(0);
4787 return(XML_ATTRIBUTE_NOTATION);
4788 }
4789 *tree = xmlParseEnumerationType(ctxt);
4790 if (*tree == NULL) return(0);
4791 return(XML_ATTRIBUTE_ENUMERATION);
4792}
4793
4794/**
4795 * xmlParseAttributeType:
4796 * @ctxt: an XML parser context
4797 * @tree: the enumeration tree built while parsing
4798 *
4799 * parse the Attribute list def for an element
4800 *
4801 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4802 *
4803 * [55] StringType ::= 'CDATA'
4804 *
4805 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4806 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4807 *
4808 * Validity constraints for attribute values syntax are checked in
4809 * xmlValidateAttributeValue()
4810 *
4811 * [ VC: ID ]
4812 * Values of type ID must match the Name production. A name must not
4813 * appear more than once in an XML document as a value of this type;
4814 * i.e., ID values must uniquely identify the elements which bear them.
4815 *
4816 * [ VC: One ID per Element Type ]
4817 * No element type may have more than one ID attribute specified.
4818 *
4819 * [ VC: ID Attribute Default ]
4820 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4821 *
4822 * [ VC: IDREF ]
4823 * Values of type IDREF must match the Name production, and values
4824 * of type IDREFS must match Names; each IDREF Name must match the value
4825 * of an ID attribute on some element in the XML document; i.e. IDREF
4826 * values must match the value of some ID attribute.
4827 *
4828 * [ VC: Entity Name ]
4829 * Values of type ENTITY must match the Name production, values
4830 * of type ENTITIES must match Names; each Entity Name must match the
4831 * name of an unparsed entity declared in the DTD.
4832 *
4833 * [ VC: Name Token ]
4834 * Values of type NMTOKEN must match the Nmtoken production; values
4835 * of type NMTOKENS must match Nmtokens.
4836 *
4837 * Returns the attribute type
4838 */
4839int
4840xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4841 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004842 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 SKIP(5);
4844 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004845 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004846 SKIP(6);
4847 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004848 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004849 SKIP(5);
4850 return(XML_ATTRIBUTE_IDREF);
4851 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4852 SKIP(2);
4853 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004854 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004855 SKIP(6);
4856 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004857 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004858 SKIP(8);
4859 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004860 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004861 SKIP(8);
4862 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004863 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 SKIP(7);
4865 return(XML_ATTRIBUTE_NMTOKEN);
4866 }
4867 return(xmlParseEnumeratedType(ctxt, tree));
4868}
4869
4870/**
4871 * xmlParseAttributeListDecl:
4872 * @ctxt: an XML parser context
4873 *
4874 * : parse the Attribute list def for an element
4875 *
4876 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4877 *
4878 * [53] AttDef ::= S Name S AttType S DefaultDecl
4879 *
4880 */
4881void
4882xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004883 const xmlChar *elemName;
4884 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004885 xmlEnumerationPtr tree;
4886
Daniel Veillarda07050d2003-10-19 14:46:32 +00004887 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004888 xmlParserInputPtr input = ctxt->input;
4889
4890 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004891 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004893 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 }
4895 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004896 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004897 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4899 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return;
4901 }
4902 SKIP_BLANKS;
4903 GROW;
4904 while (RAW != '>') {
4905 const xmlChar *check = CUR_PTR;
4906 int type;
4907 int def;
4908 xmlChar *defaultValue = NULL;
4909
4910 GROW;
4911 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004912 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004913 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4915 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004916 break;
4917 }
4918 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004921 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 break;
4923 }
4924 SKIP_BLANKS;
4925
4926 type = xmlParseAttributeType(ctxt, &tree);
4927 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 break;
4929 }
4930
4931 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004932 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4934 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004935 if (tree != NULL)
4936 xmlFreeEnumeration(tree);
4937 break;
4938 }
4939 SKIP_BLANKS;
4940
4941 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4942 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (defaultValue != NULL)
4944 xmlFree(defaultValue);
4945 if (tree != NULL)
4946 xmlFreeEnumeration(tree);
4947 break;
4948 }
4949
4950 GROW;
4951 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004954 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004955 if (defaultValue != NULL)
4956 xmlFree(defaultValue);
4957 if (tree != NULL)
4958 xmlFreeEnumeration(tree);
4959 break;
4960 }
4961 SKIP_BLANKS;
4962 }
4963 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4965 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004966 if (defaultValue != NULL)
4967 xmlFree(defaultValue);
4968 if (tree != NULL)
4969 xmlFreeEnumeration(tree);
4970 break;
4971 }
4972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973 (ctxt->sax->attributeDecl != NULL))
4974 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4975 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004976 else if (tree != NULL)
4977 xmlFreeEnumeration(tree);
4978
4979 if ((ctxt->sax2) && (defaultValue != NULL) &&
4980 (def != XML_ATTRIBUTE_IMPLIED) &&
4981 (def != XML_ATTRIBUTE_REQUIRED)) {
4982 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004984 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4985 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4986 }
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (defaultValue != NULL)
4988 xmlFree(defaultValue);
4989 GROW;
4990 }
4991 if (RAW == '>') {
4992 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004993 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004995 }
4996 NEXT;
4997 }
Owen Taylor3473f882001-02-23 17:55:21 +00004998 }
4999}
5000
5001/**
5002 * xmlParseElementMixedContentDecl:
5003 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005004 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005005 *
5006 * parse the declaration for a Mixed Element content
5007 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5008 *
5009 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5010 * '(' S? '#PCDATA' S? ')'
5011 *
5012 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5013 *
5014 * [ VC: No Duplicate Types ]
5015 * The same name must not appear more than once in a single
5016 * mixed-content declaration.
5017 *
5018 * returns: the list of the xmlElementContentPtr describing the element choices
5019 */
5020xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005021xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005023 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005024
5025 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005026 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005027 SKIP(7);
5028 SKIP_BLANKS;
5029 SHRINK;
5030 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005031 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005032 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5033"Element content declaration doesn't start and stop in the same entity\n",
5034 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005035 }
Owen Taylor3473f882001-02-23 17:55:21 +00005036 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005037 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005038 if (RAW == '*') {
5039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5040 NEXT;
5041 }
5042 return(ret);
5043 }
5044 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005045 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005046 if (ret == NULL) return(NULL);
5047 }
5048 while (RAW == '|') {
5049 NEXT;
5050 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005051 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 if (ret == NULL) return(NULL);
5053 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005054 if (cur != NULL)
5055 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005056 cur = ret;
5057 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005058 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005059 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005060 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005061 if (n->c1 != NULL)
5062 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005063 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005064 if (n != NULL)
5065 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005066 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 }
5068 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005069 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005073 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005074 return(NULL);
5075 }
5076 SKIP_BLANKS;
5077 GROW;
5078 }
5079 if ((RAW == ')') && (NXT(1) == '*')) {
5080 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005081 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005082 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005083 if (cur->c2 != NULL)
5084 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005085 }
5086 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5089"Element content declaration doesn't start and stop in the same entity\n",
5090 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 SKIP(2);
5093 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005094 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 return(NULL);
5097 }
5098
5099 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005100 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005101 }
5102 return(ret);
5103}
5104
5105/**
5106 * xmlParseElementChildrenContentDecl:
5107 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005108 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005109 *
5110 * parse the declaration for a Mixed Element content
5111 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5112 *
5113 *
5114 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5115 *
5116 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5117 *
5118 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5119 *
5120 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5121 *
5122 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5123 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005124 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005125 * opening or closing parentheses in a choice, seq, or Mixed
5126 * construct is contained in the replacement text for a parameter
5127 * entity, both must be contained in the same replacement text. For
5128 * interoperability, if a parameter-entity reference appears in a
5129 * choice, seq, or Mixed construct, its replacement text should not
5130 * be empty, and neither the first nor last non-blank character of
5131 * the replacement text should be a connector (| or ,).
5132 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005133 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005134 * hierarchy.
5135 */
5136xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005137xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005139 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005140 xmlChar type = 0;
5141
5142 SKIP_BLANKS;
5143 GROW;
5144 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005146
Owen Taylor3473f882001-02-23 17:55:21 +00005147 /* Recurse on first child */
5148 NEXT;
5149 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005151 SKIP_BLANKS;
5152 GROW;
5153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005156 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 return(NULL);
5158 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005159 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005160 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005161 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005162 return(NULL);
5163 }
Owen Taylor3473f882001-02-23 17:55:21 +00005164 GROW;
5165 if (RAW == '?') {
5166 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5167 NEXT;
5168 } else if (RAW == '*') {
5169 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5170 NEXT;
5171 } else if (RAW == '+') {
5172 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5173 NEXT;
5174 } else {
5175 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5176 }
Owen Taylor3473f882001-02-23 17:55:21 +00005177 GROW;
5178 }
5179 SKIP_BLANKS;
5180 SHRINK;
5181 while (RAW != ')') {
5182 /*
5183 * Each loop we parse one separator and one element.
5184 */
5185 if (RAW == ',') {
5186 if (type == 0) type = CUR;
5187
5188 /*
5189 * Detect "Name | Name , Name" error
5190 */
5191 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005193 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005195 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005198 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005199 return(NULL);
5200 }
5201 NEXT;
5202
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005203 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005205 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005206 xmlFreeDocElementContent(ctxt->myDoc, last);
5207 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 return(NULL);
5209 }
5210 if (last == NULL) {
5211 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005212 if (ret != NULL)
5213 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005214 ret = cur = op;
5215 } else {
5216 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005217 if (op != NULL)
5218 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005219 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005220 if (last != NULL)
5221 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005222 cur =op;
5223 last = NULL;
5224 }
5225 } else if (RAW == '|') {
5226 if (type == 0) type = CUR;
5227
5228 /*
5229 * Detect "Name , Name | Name" error
5230 */
5231 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005233 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005234 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005235 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005238 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 return(NULL);
5240 }
5241 NEXT;
5242
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005243 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005245 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005248 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005249 return(NULL);
5250 }
5251 if (last == NULL) {
5252 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005253 if (ret != NULL)
5254 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005255 ret = cur = op;
5256 } else {
5257 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005258 if (op != NULL)
5259 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005261 if (last != NULL)
5262 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005263 cur =op;
5264 last = NULL;
5265 }
5266 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005269 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(NULL);
5271 }
5272 GROW;
5273 SKIP_BLANKS;
5274 GROW;
5275 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005276 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 /* Recurse on second child */
5278 NEXT;
5279 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005281 SKIP_BLANKS;
5282 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005283 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005287 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005288 return(NULL);
5289 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005290 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005291 if (RAW == '?') {
5292 last->ocur = XML_ELEMENT_CONTENT_OPT;
5293 NEXT;
5294 } else if (RAW == '*') {
5295 last->ocur = XML_ELEMENT_CONTENT_MULT;
5296 NEXT;
5297 } else if (RAW == '+') {
5298 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5299 NEXT;
5300 } else {
5301 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5302 }
5303 }
5304 SKIP_BLANKS;
5305 GROW;
5306 }
5307 if ((cur != NULL) && (last != NULL)) {
5308 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005309 if (last != NULL)
5310 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005312 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005313 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5314"Element content declaration doesn't start and stop in the same entity\n",
5315 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 NEXT;
5318 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005319 if (ret != NULL) {
5320 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5321 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5323 else
5324 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5325 }
Owen Taylor3473f882001-02-23 17:55:21 +00005326 NEXT;
5327 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005328 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005329 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005330 cur = ret;
5331 /*
5332 * Some normalization:
5333 * (a | b* | c?)* == (a | b | c)*
5334 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005335 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005336 if ((cur->c1 != NULL) &&
5337 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5338 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5339 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5340 if ((cur->c2 != NULL) &&
5341 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5342 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5343 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5344 cur = cur->c2;
5345 }
5346 }
Owen Taylor3473f882001-02-23 17:55:21 +00005347 NEXT;
5348 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005349 if (ret != NULL) {
5350 int found = 0;
5351
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005352 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005355 else
5356 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005357 /*
5358 * Some normalization:
5359 * (a | b*)+ == (a | b)*
5360 * (a | b?)+ == (a | b)*
5361 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005363 if ((cur->c1 != NULL) &&
5364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5367 found = 1;
5368 }
5369 if ((cur->c2 != NULL) &&
5370 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5373 found = 1;
5374 }
5375 cur = cur->c2;
5376 }
5377 if (found)
5378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5379 }
Owen Taylor3473f882001-02-23 17:55:21 +00005380 NEXT;
5381 }
5382 return(ret);
5383}
5384
5385/**
5386 * xmlParseElementContentDecl:
5387 * @ctxt: an XML parser context
5388 * @name: the name of the element being defined.
5389 * @result: the Element Content pointer will be stored here if any
5390 *
5391 * parse the declaration for an Element content either Mixed or Children,
5392 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5393 *
5394 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5395 *
5396 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5397 */
5398
5399int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005400xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005401 xmlElementContentPtr *result) {
5402
5403 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 int res;
5406
5407 *result = NULL;
5408
5409 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005410 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005411 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 return(-1);
5413 }
5414 NEXT;
5415 GROW;
5416 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005417 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005418 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 res = XML_ELEMENT_TYPE_MIXED;
5420 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005422 res = XML_ELEMENT_TYPE_ELEMENT;
5423 }
Owen Taylor3473f882001-02-23 17:55:21 +00005424 SKIP_BLANKS;
5425 *result = tree;
5426 return(res);
5427}
5428
5429/**
5430 * xmlParseElementDecl:
5431 * @ctxt: an XML parser context
5432 *
5433 * parse an Element declaration.
5434 *
5435 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5436 *
5437 * [ VC: Unique Element Type Declaration ]
5438 * No element type may be declared more than once
5439 *
5440 * Returns the type of the element, or -1 in case of error
5441 */
5442int
5443xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005444 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005445 int ret = -1;
5446 xmlElementContentPtr content = NULL;
5447
Daniel Veillard4c778d82005-01-23 17:37:44 +00005448 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005449 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005450 xmlParserInputPtr input = ctxt->input;
5451
5452 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005453 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005456 }
5457 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5461 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 return(-1);
5463 }
5464 while ((RAW == 0) && (ctxt->inputNr > 1))
5465 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005466 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
5470 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005471 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005472 SKIP(5);
5473 /*
5474 * Element must always be empty.
5475 */
5476 ret = XML_ELEMENT_TYPE_EMPTY;
5477 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5478 (NXT(2) == 'Y')) {
5479 SKIP(3);
5480 /*
5481 * Element is a generic container.
5482 */
5483 ret = XML_ELEMENT_TYPE_ANY;
5484 } else if (RAW == '(') {
5485 ret = xmlParseElementContentDecl(ctxt, name, &content);
5486 } else {
5487 /*
5488 * [ WFC: PEs in Internal Subset ] error handling.
5489 */
5490 if ((RAW == '%') && (ctxt->external == 0) &&
5491 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005492 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005494 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005495 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005496 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498 return(-1);
5499 }
5500
5501 SKIP_BLANKS;
5502 /*
5503 * Pop-up of finished entities.
5504 */
5505 while ((RAW == 0) && (ctxt->inputNr > 1))
5506 xmlPopInput(ctxt);
5507 SKIP_BLANKS;
5508
5509 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005510 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005511 if (content != NULL) {
5512 xmlFreeDocElementContent(ctxt->myDoc, content);
5513 }
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else {
5515 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005516 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519
5520 NEXT;
5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005522 (ctxt->sax->elementDecl != NULL)) {
5523 if (content != NULL)
5524 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5526 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005527 if ((content != NULL) && (content->parent == NULL)) {
5528 /*
5529 * this is a trick: if xmlAddElementDecl is called,
5530 * instead of copying the full tree it is plugged directly
5531 * if called from the parser. Avoid duplicating the
5532 * interfaces or change the API/ABI
5533 */
5534 xmlFreeDocElementContent(ctxt->myDoc, content);
5535 }
5536 } else if (content != NULL) {
5537 xmlFreeDocElementContent(ctxt->myDoc, content);
5538 }
Owen Taylor3473f882001-02-23 17:55:21 +00005539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
5541 return(ret);
5542}
5543
5544/**
Owen Taylor3473f882001-02-23 17:55:21 +00005545 * xmlParseConditionalSections
5546 * @ctxt: an XML parser context
5547 *
5548 * [61] conditionalSect ::= includeSect | ignoreSect
5549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5553 */
5554
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005555static void
Owen Taylor3473f882001-02-23 17:55:21 +00005556xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5557 SKIP(3);
5558 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005559 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 SKIP(7);
5561 SKIP_BLANKS;
5562 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else {
5565 NEXT;
5566 }
5567 if (xmlParserDebugEntities) {
5568 if ((ctxt->input != NULL) && (ctxt->input->filename))
5569 xmlGenericError(xmlGenericErrorContext,
5570 "%s(%d): ", ctxt->input->filename,
5571 ctxt->input->line);
5572 xmlGenericError(xmlGenericErrorContext,
5573 "Entering INCLUDE Conditional Section\n");
5574 }
5575
5576 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5577 (NXT(2) != '>'))) {
5578 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005579 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005580
5581 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5582 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005583 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005584 NEXT;
5585 } else if (RAW == '%') {
5586 xmlParsePEReference(ctxt);
5587 } else
5588 xmlParseMarkupDecl(ctxt);
5589
5590 /*
5591 * Pop-up of finished entities.
5592 */
5593 while ((RAW == 0) && (ctxt->inputNr > 1))
5594 xmlPopInput(ctxt);
5595
Daniel Veillardfdc91562002-07-01 21:52:03 +00005596 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005597 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 break;
5599 }
5600 }
5601 if (xmlParserDebugEntities) {
5602 if ((ctxt->input != NULL) && (ctxt->input->filename))
5603 xmlGenericError(xmlGenericErrorContext,
5604 "%s(%d): ", ctxt->input->filename,
5605 ctxt->input->line);
5606 xmlGenericError(xmlGenericErrorContext,
5607 "Leaving INCLUDE Conditional Section\n");
5608 }
5609
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005612 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005613 int depth = 0;
5614
5615 SKIP(6);
5616 SKIP_BLANKS;
5617 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005618 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005619 } else {
5620 NEXT;
5621 }
5622 if (xmlParserDebugEntities) {
5623 if ((ctxt->input != NULL) && (ctxt->input->filename))
5624 xmlGenericError(xmlGenericErrorContext,
5625 "%s(%d): ", ctxt->input->filename,
5626 ctxt->input->line);
5627 xmlGenericError(xmlGenericErrorContext,
5628 "Entering IGNORE Conditional Section\n");
5629 }
5630
5631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005632 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005633 * But disable SAX event generating DTD building in the meantime
5634 */
5635 state = ctxt->disableSAX;
5636 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005638 ctxt->instate = XML_PARSER_IGNORE;
5639
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005640 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5642 depth++;
5643 SKIP(3);
5644 continue;
5645 }
5646 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5647 if (--depth >= 0) SKIP(3);
5648 continue;
5649 }
5650 NEXT;
5651 continue;
5652 }
5653
5654 ctxt->disableSAX = state;
5655 ctxt->instate = instate;
5656
5657 if (xmlParserDebugEntities) {
5658 if ((ctxt->input != NULL) && (ctxt->input->filename))
5659 xmlGenericError(xmlGenericErrorContext,
5660 "%s(%d): ", ctxt->input->filename,
5661 ctxt->input->line);
5662 xmlGenericError(xmlGenericErrorContext,
5663 "Leaving IGNORE Conditional Section\n");
5664 }
5665
5666 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669
5670 if (RAW == 0)
5671 SHRINK;
5672
5673 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005674 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005675 } else {
5676 SKIP(3);
5677 }
5678}
5679
5680/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005681 * xmlParseMarkupDecl:
5682 * @ctxt: an XML parser context
5683 *
5684 * parse Markup declarations
5685 *
5686 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5687 * NotationDecl | PI | Comment
5688 *
5689 * [ VC: Proper Declaration/PE Nesting ]
5690 * Parameter-entity replacement text must be properly nested with
5691 * markup declarations. That is to say, if either the first character
5692 * or the last character of a markup declaration (markupdecl above) is
5693 * contained in the replacement text for a parameter-entity reference,
5694 * both must be contained in the same replacement text.
5695 *
5696 * [ WFC: PEs in Internal Subset ]
5697 * In the internal DTD subset, parameter-entity references can occur
5698 * only where markup declarations can occur, not within markup declarations.
5699 * (This does not apply to references that occur in external parameter
5700 * entities or to the external subset.)
5701 */
5702void
5703xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5704 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005705 if (CUR == '<') {
5706 if (NXT(1) == '!') {
5707 switch (NXT(2)) {
5708 case 'E':
5709 if (NXT(3) == 'L')
5710 xmlParseElementDecl(ctxt);
5711 else if (NXT(3) == 'N')
5712 xmlParseEntityDecl(ctxt);
5713 break;
5714 case 'A':
5715 xmlParseAttributeListDecl(ctxt);
5716 break;
5717 case 'N':
5718 xmlParseNotationDecl(ctxt);
5719 break;
5720 case '-':
5721 xmlParseComment(ctxt);
5722 break;
5723 default:
5724 /* there is an error but it will be detected later */
5725 break;
5726 }
5727 } else if (NXT(1) == '?') {
5728 xmlParsePI(ctxt);
5729 }
5730 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005731 /*
5732 * This is only for internal subset. On external entities,
5733 * the replacement is done before parsing stage
5734 */
5735 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5736 xmlParsePEReference(ctxt);
5737
5738 /*
5739 * Conditional sections are allowed from entities included
5740 * by PE References in the internal subset.
5741 */
5742 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5744 xmlParseConditionalSections(ctxt);
5745 }
5746 }
5747
5748 ctxt->instate = XML_PARSER_DTD;
5749}
5750
5751/**
5752 * xmlParseTextDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse an XML declaration header for external entities
5756 *
5757 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5758 *
5759 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5760 */
5761
5762void
5763xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5764 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005765 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005766
5767 /*
5768 * We know that '<?xml' is here.
5769 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005771 SKIP(5);
5772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005774 return;
5775 }
5776
William M. Brack76e95df2003-10-18 16:20:14 +00005777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005780 }
5781 SKIP_BLANKS;
5782
5783 /*
5784 * We may have the VersionInfo here.
5785 */
5786 version = xmlParseVersionInfo(ctxt);
5787 if (version == NULL)
5788 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005789 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005790 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005793 }
5794 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005795 ctxt->input->version = version;
5796
5797 /*
5798 * We must have the encoding declaration
5799 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005800 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005801 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5802 /*
5803 * The XML REC instructs us to stop parsing right here
5804 */
5805 return;
5806 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005807 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5809 "Missing encoding in text declaration\n");
5810 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005811
5812 SKIP_BLANKS;
5813 if ((RAW == '?') && (NXT(1) == '>')) {
5814 SKIP(2);
5815 } else if (RAW == '>') {
5816 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005817 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005818 NEXT;
5819 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005820 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005821 MOVETO_ENDTAG(CUR_PTR);
5822 NEXT;
5823 }
5824}
5825
5826/**
Owen Taylor3473f882001-02-23 17:55:21 +00005827 * xmlParseExternalSubset:
5828 * @ctxt: an XML parser context
5829 * @ExternalID: the external identifier
5830 * @SystemID: the system identifier (or URL)
5831 *
5832 * parse Markup declarations from an external subset
5833 *
5834 * [30] extSubset ::= textDecl? extSubsetDecl
5835 *
5836 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5837 */
5838void
5839xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5840 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005841 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005842 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005843 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005844 xmlParseTextDecl(ctxt);
5845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5846 /*
5847 * The XML REC instructs us to stop parsing right here
5848 */
5849 ctxt->instate = XML_PARSER_EOF;
5850 return;
5851 }
5852 }
5853 if (ctxt->myDoc == NULL) {
5854 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5855 }
5856 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5857 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5858
5859 ctxt->instate = XML_PARSER_DTD;
5860 ctxt->external = 1;
5861 while (((RAW == '<') && (NXT(1) == '?')) ||
5862 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005863 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005864 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005865 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005866
5867 GROW;
5868 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5869 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005870 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 NEXT;
5872 } else if (RAW == '%') {
5873 xmlParsePEReference(ctxt);
5874 } else
5875 xmlParseMarkupDecl(ctxt);
5876
5877 /*
5878 * Pop-up of finished entities.
5879 */
5880 while ((RAW == 0) && (ctxt->inputNr > 1))
5881 xmlPopInput(ctxt);
5882
Daniel Veillardfdc91562002-07-01 21:52:03 +00005883 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005884 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 break;
5886 }
5887 }
5888
5889 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 }
5892
5893}
5894
5895/**
5896 * xmlParseReference:
5897 * @ctxt: an XML parser context
5898 *
5899 * parse and handle entity references in content, depending on the SAX
5900 * interface, this may end-up in a call to character() if this is a
5901 * CharRef, a predefined entity, if there is no reference() callback.
5902 * or if the parser was asked to switch to that mode.
5903 *
5904 * [67] Reference ::= EntityRef | CharRef
5905 */
5906void
5907xmlParseReference(xmlParserCtxtPtr ctxt) {
5908 xmlEntityPtr ent;
5909 xmlChar *val;
5910 if (RAW != '&') return;
5911
5912 if (NXT(1) == '#') {
5913 int i = 0;
5914 xmlChar out[10];
5915 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005916 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005917
5918 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5919 /*
5920 * So we are using non-UTF-8 buffers
5921 * Check that the char fit on 8bits, if not
5922 * generate a CharRef.
5923 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005924 if (value <= 0xFF) {
5925 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005926 out[1] = 0;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928 (!ctxt->disableSAX))
5929 ctxt->sax->characters(ctxt->userData, out, 1);
5930 } else {
5931 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005932 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005934 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5936 (!ctxt->disableSAX))
5937 ctxt->sax->reference(ctxt->userData, out);
5938 }
5939 } else {
5940 /*
5941 * Just encode the value in UTF-8
5942 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005943 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 out[i] = 0;
5945 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5946 (!ctxt->disableSAX))
5947 ctxt->sax->characters(ctxt->userData, out, i);
5948 }
5949 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005950 int was_checked;
5951
Owen Taylor3473f882001-02-23 17:55:21 +00005952 ent = xmlParseEntityRef(ctxt);
5953 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005954 if (!ctxt->wellFormed)
5955 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005956 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005957 if ((ent->name != NULL) &&
5958 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5959 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005960 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005961
5962
5963 /*
5964 * The first reference to the entity trigger a parsing phase
5965 * where the ent->children is filled with the result from
5966 * the parsing.
5967 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005968 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005969 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005970
Owen Taylor3473f882001-02-23 17:55:21 +00005971 value = ent->content;
5972
5973 /*
5974 * Check that this entity is well formed
5975 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005976 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005977 (value[1] == 0) && (value[0] == '<') &&
5978 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5979 /*
5980 * DONE: get definite answer on this !!!
5981 * Lots of entity decls are used to declare a single
5982 * char
5983 * <!ENTITY lt "<">
5984 * Which seems to be valid since
5985 * 2.4: The ampersand character (&) and the left angle
5986 * bracket (<) may appear in their literal form only
5987 * when used ... They are also legal within the literal
5988 * entity value of an internal entity declaration;i
5989 * see "4.3.2 Well-Formed Parsed Entities".
5990 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5991 * Looking at the OASIS test suite and James Clark
5992 * tests, this is broken. However the XML REC uses
5993 * it. Is the XML REC not well-formed ????
5994 * This is a hack to avoid this problem
5995 *
5996 * ANSWER: since lt gt amp .. are already defined,
5997 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005999 * is lousy but acceptable.
6000 */
6001 list = xmlNewDocText(ctxt->myDoc, value);
6002 if (list != NULL) {
6003 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6004 (ent->children == NULL)) {
6005 ent->children = list;
6006 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006007 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006008 list->parent = (xmlNodePtr) ent;
6009 } else {
6010 xmlFreeNodeList(list);
6011 }
6012 } else if (list != NULL) {
6013 xmlFreeNodeList(list);
6014 }
6015 } else {
6016 /*
6017 * 4.3.2: An internal general parsed entity is well-formed
6018 * if its replacement text matches the production labeled
6019 * content.
6020 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006021
6022 void *user_data;
6023 /*
6024 * This is a bit hackish but this seems the best
6025 * way to make sure both SAX and DOM entity support
6026 * behaves okay.
6027 */
6028 if (ctxt->userData == ctxt)
6029 user_data = NULL;
6030 else
6031 user_data = ctxt->userData;
6032
Owen Taylor3473f882001-02-23 17:55:21 +00006033 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6034 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006035 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6036 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006037 ctxt->depth--;
6038 } else if (ent->etype ==
6039 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6040 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006041 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006042 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006043 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 ctxt->depth--;
6045 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006046 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006047 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6048 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
6050 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006051 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006052 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006053 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006054 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006056 (ent->children == NULL)) {
6057 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006058 if (ctxt->replaceEntities) {
6059 /*
6060 * Prune it directly in the generated document
6061 * except for single text nodes.
6062 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006063 if (((list->type == XML_TEXT_NODE) &&
6064 (list->next == NULL)) ||
6065 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006066 list->parent = (xmlNodePtr) ent;
6067 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006068 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006069 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006070 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006071 while (list != NULL) {
6072 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006073 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006074 if (list->next == NULL)
6075 ent->last = list;
6076 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006077 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006078 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006079#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006080 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6081 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006082#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006083 }
6084 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006085 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006086 while (list != NULL) {
6087 list->parent = (xmlNodePtr) ent;
6088 if (list->next == NULL)
6089 ent->last = list;
6090 list = list->next;
6091 }
Owen Taylor3473f882001-02-23 17:55:21 +00006092 }
6093 } else {
6094 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006095 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006096 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006097 } else if ((ret != XML_ERR_OK) &&
6098 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006099 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 } else if (list != NULL) {
6101 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006102 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006105 ent->checked = 1;
6106 }
6107
6108 if (ent->children == NULL) {
6109 /*
6110 * Probably running in SAX mode and the callbacks don't
6111 * build the entity content. So unless we already went
6112 * though parsing for first checking go though the entity
6113 * content to generate callbacks associated to the entity
6114 */
6115 if (was_checked == 1) {
6116 void *user_data;
6117 /*
6118 * This is a bit hackish but this seems the best
6119 * way to make sure both SAX and DOM entity support
6120 * behaves okay.
6121 */
6122 if (ctxt->userData == ctxt)
6123 user_data = NULL;
6124 else
6125 user_data = ctxt->userData;
6126
6127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6128 ctxt->depth++;
6129 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6130 ent->content, user_data, NULL);
6131 ctxt->depth--;
6132 } else if (ent->etype ==
6133 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6134 ctxt->depth++;
6135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6136 ctxt->sax, user_data, ctxt->depth,
6137 ent->URI, ent->ExternalID, NULL);
6138 ctxt->depth--;
6139 } else {
6140 ret = XML_ERR_ENTITY_PE_INTERNAL;
6141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6142 "invalid entity type found\n", NULL);
6143 }
6144 if (ret == XML_ERR_ENTITY_LOOP) {
6145 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6146 return;
6147 }
6148 }
6149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6151 /*
6152 * Entity reference callback comes second, it's somewhat
6153 * superfluous but a compatibility to historical behaviour
6154 */
6155 ctxt->sax->reference(ctxt->userData, ent->name);
6156 }
6157 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 }
6159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006160 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006161 /*
6162 * Create a node.
6163 */
6164 ctxt->sax->reference(ctxt->userData, ent->name);
6165 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006166 }
6167 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006168 /*
6169 * There is a problem on the handling of _private for entities
6170 * (bug 155816): Should we copy the content of the field from
6171 * the entity (possibly overwriting some value set by the user
6172 * when a copy is created), should we leave it alone, or should
6173 * we try to take care of different situations? The problem
6174 * is exacerbated by the usage of this field by the xmlReader.
6175 * To fix this bug, we look at _private on the created node
6176 * and, if it's NULL, we copy in whatever was in the entity.
6177 * If it's not NULL we leave it alone. This is somewhat of a
6178 * hack - maybe we should have further tests to determine
6179 * what to do.
6180 */
Owen Taylor3473f882001-02-23 17:55:21 +00006181 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6182 /*
6183 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006184 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006185 * In the first occurrence list contains the replacement.
6186 * progressive == 2 means we are operating on the Reader
6187 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006188 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006189 if (((list == NULL) && (ent->owner == 0)) ||
6190 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006191 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006192
6193 /*
6194 * when operating on a reader, the entities definitions
6195 * are always owning the entities subtree.
6196 if (ctxt->parseMode == XML_PARSE_READER)
6197 ent->owner = 1;
6198 */
6199
Daniel Veillard62f313b2001-07-04 19:49:14 +00006200 cur = ent->children;
6201 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006203 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006204 if (nw->_private == NULL)
6205 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006206 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006207 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006208 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006209 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006210 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006211 if (cur == ent->last) {
6212 /*
6213 * needed to detect some strange empty
6214 * node cases in the reader tests
6215 */
6216 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006217 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006218 (nw->type == XML_ELEMENT_NODE) &&
6219 (nw->children == NULL))
6220 nw->extra = 1;
6221
Daniel Veillard62f313b2001-07-04 19:49:14 +00006222 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006223 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006224 cur = cur->next;
6225 }
Daniel Veillard81273902003-09-30 00:43:48 +00006226#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006227 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006228 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006229#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006230 } else if (list == NULL) {
6231 xmlNodePtr nw = NULL, cur, next, last,
6232 firstChild = NULL;
6233 /*
6234 * Copy the entity child list and make it the new
6235 * entity child list. The goal is to make sure any
6236 * ID or REF referenced will be the one from the
6237 * document content and not the entity copy.
6238 */
6239 cur = ent->children;
6240 ent->children = NULL;
6241 last = ent->last;
6242 ent->last = NULL;
6243 while (cur != NULL) {
6244 next = cur->next;
6245 cur->next = NULL;
6246 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006247 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006248 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006249 if (nw->_private == NULL)
6250 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006251 if (firstChild == NULL){
6252 firstChild = cur;
6253 }
6254 xmlAddChild((xmlNodePtr) ent, nw);
6255 xmlAddChild(ctxt->node, cur);
6256 }
6257 if (cur == last)
6258 break;
6259 cur = next;
6260 }
6261 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006262#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006265#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006266 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006267 const xmlChar *nbktext;
6268
Daniel Veillard62f313b2001-07-04 19:49:14 +00006269 /*
6270 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006271 * node with a possible previous text one which
6272 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006273 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006274 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6275 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006277 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 if ((ent->last != ent->children) &&
6279 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006280 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006281 xmlAddChildList(ctxt->node, ent->children);
6282 }
6283
Owen Taylor3473f882001-02-23 17:55:21 +00006284 /*
6285 * This is to avoid a nasty side effect, see
6286 * characters() in SAX.c
6287 */
6288 ctxt->nodemem = 0;
6289 ctxt->nodelen = 0;
6290 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006291 }
6292 }
6293 } else {
6294 val = ent->content;
6295 if (val == NULL) return;
6296 /*
6297 * inline the entity.
6298 */
6299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6300 (!ctxt->disableSAX))
6301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6302 }
6303 }
6304}
6305
6306/**
6307 * xmlParseEntityRef:
6308 * @ctxt: an XML parser context
6309 *
6310 * parse ENTITY references declarations
6311 *
6312 * [68] EntityRef ::= '&' Name ';'
6313 *
6314 * [ WFC: Entity Declared ]
6315 * In a document without any DTD, a document with only an internal DTD
6316 * subset which contains no parameter entity references, or a document
6317 * with "standalone='yes'", the Name given in the entity reference
6318 * must match that in an entity declaration, except that well-formed
6319 * documents need not declare any of the following entities: amp, lt,
6320 * gt, apos, quot. The declaration of a parameter entity must precede
6321 * any reference to it. Similarly, the declaration of a general entity
6322 * must precede any reference to it which appears in a default value in an
6323 * attribute-list declaration. Note that if entities are declared in the
6324 * external subset or in external parameter entities, a non-validating
6325 * processor is not obligated to read and process their declarations;
6326 * for such documents, the rule that an entity must be declared is a
6327 * well-formedness constraint only if standalone='yes'.
6328 *
6329 * [ WFC: Parsed Entity ]
6330 * An entity reference must not contain the name of an unparsed entity
6331 *
6332 * Returns the xmlEntityPtr if found, or NULL otherwise.
6333 */
6334xmlEntityPtr
6335xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 xmlEntityPtr ent = NULL;
6338
6339 GROW;
6340
6341 if (RAW == '&') {
6342 NEXT;
6343 name = xmlParseName(ctxt);
6344 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006347 } else {
6348 if (RAW == ';') {
6349 NEXT;
6350 /*
6351 * Ask first SAX for entity resolution, otherwise try the
6352 * predefined set.
6353 */
6354 if (ctxt->sax != NULL) {
6355 if (ctxt->sax->getEntity != NULL)
6356 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006357 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006358 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006359 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6360 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006361 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006362 }
Owen Taylor3473f882001-02-23 17:55:21 +00006363 }
6364 /*
6365 * [ WFC: Entity Declared ]
6366 * In a document without any DTD, a document with only an
6367 * internal DTD subset which contains no parameter entity
6368 * references, or a document with "standalone='yes'", the
6369 * Name given in the entity reference must match that in an
6370 * entity declaration, except that well-formed documents
6371 * need not declare any of the following entities: amp, lt,
6372 * gt, apos, quot.
6373 * The declaration of a parameter entity must precede any
6374 * reference to it.
6375 * Similarly, the declaration of a general entity must
6376 * precede any reference to it which appears in a default
6377 * value in an attribute-list declaration. Note that if
6378 * entities are declared in the external subset or in
6379 * external parameter entities, a non-validating processor
6380 * is not obligated to read and process their declarations;
6381 * for such documents, the rule that an entity must be
6382 * declared is a well-formedness constraint only if
6383 * standalone='yes'.
6384 */
6385 if (ent == NULL) {
6386 if ((ctxt->standalone == 1) ||
6387 ((ctxt->hasExternalSubset == 0) &&
6388 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006389 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006390 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006392 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006393 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006394 if ((ctxt->inSubset == 0) &&
6395 (ctxt->sax != NULL) &&
6396 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006397 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006398 }
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402
6403 /*
6404 * [ WFC: Parsed Entity ]
6405 * An entity reference must not contain the name of an
6406 * unparsed entity
6407 */
6408 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006409 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006410 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006411 }
6412
6413 /*
6414 * [ WFC: No External Entity References ]
6415 * Attribute values cannot contain direct or indirect
6416 * entity references to external entities.
6417 */
6418 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6419 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6421 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423 /*
6424 * [ WFC: No < in Attribute Values ]
6425 * The replacement text of any entity referred to directly or
6426 * indirectly in an attribute value (other than "&lt;") must
6427 * not contain a <.
6428 */
6429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6430 (ent != NULL) &&
6431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6432 (ent->content != NULL) &&
6433 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006434 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006435 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006436 }
6437
6438 /*
6439 * Internal check, no parameter entities here ...
6440 */
6441 else {
6442 switch (ent->etype) {
6443 case XML_INTERNAL_PARAMETER_ENTITY:
6444 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006445 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6446 "Attempt to reference the parameter entity '%s'\n",
6447 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006448 break;
6449 default:
6450 break;
6451 }
6452 }
6453
6454 /*
6455 * [ WFC: No Recursion ]
6456 * A parsed entity must not contain a recursive reference
6457 * to itself, either directly or indirectly.
6458 * Done somewhere else
6459 */
6460
6461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 }
6465 }
6466 return(ent);
6467}
6468
6469/**
6470 * xmlParseStringEntityRef:
6471 * @ctxt: an XML parser context
6472 * @str: a pointer to an index in the string
6473 *
6474 * parse ENTITY references declarations, but this version parses it from
6475 * a string value.
6476 *
6477 * [68] EntityRef ::= '&' Name ';'
6478 *
6479 * [ WFC: Entity Declared ]
6480 * In a document without any DTD, a document with only an internal DTD
6481 * subset which contains no parameter entity references, or a document
6482 * with "standalone='yes'", the Name given in the entity reference
6483 * must match that in an entity declaration, except that well-formed
6484 * documents need not declare any of the following entities: amp, lt,
6485 * gt, apos, quot. The declaration of a parameter entity must precede
6486 * any reference to it. Similarly, the declaration of a general entity
6487 * must precede any reference to it which appears in a default value in an
6488 * attribute-list declaration. Note that if entities are declared in the
6489 * external subset or in external parameter entities, a non-validating
6490 * processor is not obligated to read and process their declarations;
6491 * for such documents, the rule that an entity must be declared is a
6492 * well-formedness constraint only if standalone='yes'.
6493 *
6494 * [ WFC: Parsed Entity ]
6495 * An entity reference must not contain the name of an unparsed entity
6496 *
6497 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6498 * is updated to the current location in the string.
6499 */
6500xmlEntityPtr
6501xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6502 xmlChar *name;
6503 const xmlChar *ptr;
6504 xmlChar cur;
6505 xmlEntityPtr ent = NULL;
6506
6507 if ((str == NULL) || (*str == NULL))
6508 return(NULL);
6509 ptr = *str;
6510 cur = *ptr;
6511 if (cur == '&') {
6512 ptr++;
6513 cur = *ptr;
6514 name = xmlParseStringName(ctxt, &ptr);
6515 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6517 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006518 } else {
6519 if (*ptr == ';') {
6520 ptr++;
6521 /*
6522 * Ask first SAX for entity resolution, otherwise try the
6523 * predefined set.
6524 */
6525 if (ctxt->sax != NULL) {
6526 if (ctxt->sax->getEntity != NULL)
6527 ent = ctxt->sax->getEntity(ctxt->userData, name);
6528 if (ent == NULL)
6529 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006530 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006531 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006532 }
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
6534 /*
6535 * [ WFC: Entity Declared ]
6536 * In a document without any DTD, a document with only an
6537 * internal DTD subset which contains no parameter entity
6538 * references, or a document with "standalone='yes'", the
6539 * Name given in the entity reference must match that in an
6540 * entity declaration, except that well-formed documents
6541 * need not declare any of the following entities: amp, lt,
6542 * gt, apos, quot.
6543 * The declaration of a parameter entity must precede any
6544 * reference to it.
6545 * Similarly, the declaration of a general entity must
6546 * precede any reference to it which appears in a default
6547 * value in an attribute-list declaration. Note that if
6548 * entities are declared in the external subset or in
6549 * external parameter entities, a non-validating processor
6550 * is not obligated to read and process their declarations;
6551 * for such documents, the rule that an entity must be
6552 * declared is a well-formedness constraint only if
6553 * standalone='yes'.
6554 */
6555 if (ent == NULL) {
6556 if ((ctxt->standalone == 1) ||
6557 ((ctxt->hasExternalSubset == 0) &&
6558 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006560 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006563 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006564 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006566 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568
6569 /*
6570 * [ WFC: Parsed Entity ]
6571 * An entity reference must not contain the name of an
6572 * unparsed entity
6573 */
6574 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 }
6578
6579 /*
6580 * [ WFC: No External Entity References ]
6581 * Attribute values cannot contain direct or indirect
6582 * entity references to external entities.
6583 */
6584 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006586 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006587 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589 /*
6590 * [ WFC: No < in Attribute Values ]
6591 * The replacement text of any entity referred to directly or
6592 * indirectly in an attribute value (other than "&lt;") must
6593 * not contain a <.
6594 */
6595 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6596 (ent != NULL) &&
6597 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6598 (ent->content != NULL) &&
6599 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006600 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6601 "'<' in entity '%s' is not allowed in attributes values\n",
6602 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604
6605 /*
6606 * Internal check, no parameter entities here ...
6607 */
6608 else {
6609 switch (ent->etype) {
6610 case XML_INTERNAL_PARAMETER_ENTITY:
6611 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6613 "Attempt to reference the parameter entity '%s'\n",
6614 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006615 break;
6616 default:
6617 break;
6618 }
6619 }
6620
6621 /*
6622 * [ WFC: No Recursion ]
6623 * A parsed entity must not contain a recursive reference
6624 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006625 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006626 */
6627
6628 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006629 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006630 }
6631 xmlFree(name);
6632 }
6633 }
6634 *str = ptr;
6635 return(ent);
6636}
6637
6638/**
6639 * xmlParsePEReference:
6640 * @ctxt: an XML parser context
6641 *
6642 * parse PEReference declarations
6643 * The entity content is handled directly by pushing it's content as
6644 * a new input stream.
6645 *
6646 * [69] PEReference ::= '%' Name ';'
6647 *
6648 * [ WFC: No Recursion ]
6649 * A parsed entity must not contain a recursive
6650 * reference to itself, either directly or indirectly.
6651 *
6652 * [ WFC: Entity Declared ]
6653 * In a document without any DTD, a document with only an internal DTD
6654 * subset which contains no parameter entity references, or a document
6655 * with "standalone='yes'", ... ... The declaration of a parameter
6656 * entity must precede any reference to it...
6657 *
6658 * [ VC: Entity Declared ]
6659 * In a document with an external subset or external parameter entities
6660 * with "standalone='no'", ... ... The declaration of a parameter entity
6661 * must precede any reference to it...
6662 *
6663 * [ WFC: In DTD ]
6664 * Parameter-entity references may only appear in the DTD.
6665 * NOTE: misleading but this is handled.
6666 */
6667void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006668xmlParsePEReference(xmlParserCtxtPtr ctxt)
6669{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006670 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006671 xmlEntityPtr entity = NULL;
6672 xmlParserInputPtr input;
6673
6674 if (RAW == '%') {
6675 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006676 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006677 if (name == NULL) {
6678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6679 "xmlParsePEReference: no name\n");
6680 } else {
6681 if (RAW == ';') {
6682 NEXT;
6683 if ((ctxt->sax != NULL) &&
6684 (ctxt->sax->getParameterEntity != NULL))
6685 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6686 name);
6687 if (entity == NULL) {
6688 /*
6689 * [ WFC: Entity Declared ]
6690 * In a document without any DTD, a document with only an
6691 * internal DTD subset which contains no parameter entity
6692 * references, or a document with "standalone='yes'", ...
6693 * ... The declaration of a parameter entity must precede
6694 * any reference to it...
6695 */
6696 if ((ctxt->standalone == 1) ||
6697 ((ctxt->hasExternalSubset == 0) &&
6698 (ctxt->hasPErefs == 0))) {
6699 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6700 "PEReference: %%%s; not found\n",
6701 name);
6702 } else {
6703 /*
6704 * [ VC: Entity Declared ]
6705 * In a document with an external subset or external
6706 * parameter entities with "standalone='no'", ...
6707 * ... The declaration of a parameter entity must
6708 * precede any reference to it...
6709 */
6710 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6711 "PEReference: %%%s; not found\n",
6712 name, NULL);
6713 ctxt->valid = 0;
6714 }
6715 } else {
6716 /*
6717 * Internal checking in case the entity quest barfed
6718 */
6719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6721 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6722 "Internal: %%%s; is not a parameter entity\n",
6723 name, NULL);
6724 } else if (ctxt->input->free != deallocblankswrapper) {
6725 input =
6726 xmlNewBlanksWrapperInputStream(ctxt, entity);
6727 xmlPushInput(ctxt, input);
6728 } else {
6729 /*
6730 * TODO !!!
6731 * handle the extra spaces added before and after
6732 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6733 */
6734 input = xmlNewEntityInputStream(ctxt, entity);
6735 xmlPushInput(ctxt, input);
6736 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006737 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006738 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006739 xmlParseTextDecl(ctxt);
6740 if (ctxt->errNo ==
6741 XML_ERR_UNSUPPORTED_ENCODING) {
6742 /*
6743 * The XML REC instructs us to stop parsing
6744 * right here
6745 */
6746 ctxt->instate = XML_PARSER_EOF;
6747 return;
6748 }
6749 }
6750 }
6751 }
6752 ctxt->hasPErefs = 1;
6753 } else {
6754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6755 }
6756 }
Owen Taylor3473f882001-02-23 17:55:21 +00006757 }
6758}
6759
6760/**
6761 * xmlParseStringPEReference:
6762 * @ctxt: an XML parser context
6763 * @str: a pointer to an index in the string
6764 *
6765 * parse PEReference declarations
6766 *
6767 * [69] PEReference ::= '%' Name ';'
6768 *
6769 * [ WFC: No Recursion ]
6770 * A parsed entity must not contain a recursive
6771 * reference to itself, either directly or indirectly.
6772 *
6773 * [ WFC: Entity Declared ]
6774 * In a document without any DTD, a document with only an internal DTD
6775 * subset which contains no parameter entity references, or a document
6776 * with "standalone='yes'", ... ... The declaration of a parameter
6777 * entity must precede any reference to it...
6778 *
6779 * [ VC: Entity Declared ]
6780 * In a document with an external subset or external parameter entities
6781 * with "standalone='no'", ... ... The declaration of a parameter entity
6782 * must precede any reference to it...
6783 *
6784 * [ WFC: In DTD ]
6785 * Parameter-entity references may only appear in the DTD.
6786 * NOTE: misleading but this is handled.
6787 *
6788 * Returns the string of the entity content.
6789 * str is updated to the current value of the index
6790 */
6791xmlEntityPtr
6792xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6793 const xmlChar *ptr;
6794 xmlChar cur;
6795 xmlChar *name;
6796 xmlEntityPtr entity = NULL;
6797
6798 if ((str == NULL) || (*str == NULL)) return(NULL);
6799 ptr = *str;
6800 cur = *ptr;
6801 if (cur == '%') {
6802 ptr++;
6803 cur = *ptr;
6804 name = xmlParseStringName(ctxt, &ptr);
6805 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6807 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006808 } else {
6809 cur = *ptr;
6810 if (cur == ';') {
6811 ptr++;
6812 cur = *ptr;
6813 if ((ctxt->sax != NULL) &&
6814 (ctxt->sax->getParameterEntity != NULL))
6815 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6816 name);
6817 if (entity == NULL) {
6818 /*
6819 * [ WFC: Entity Declared ]
6820 * In a document without any DTD, a document with only an
6821 * internal DTD subset which contains no parameter entity
6822 * references, or a document with "standalone='yes'", ...
6823 * ... The declaration of a parameter entity must precede
6824 * any reference to it...
6825 */
6826 if ((ctxt->standalone == 1) ||
6827 ((ctxt->hasExternalSubset == 0) &&
6828 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006830 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 } else {
6832 /*
6833 * [ VC: Entity Declared ]
6834 * In a document with an external subset or external
6835 * parameter entities with "standalone='no'", ...
6836 * ... The declaration of a parameter entity must
6837 * precede any reference to it...
6838 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6840 "PEReference: %%%s; not found\n",
6841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006842 ctxt->valid = 0;
6843 }
6844 } else {
6845 /*
6846 * Internal checking in case the entity quest barfed
6847 */
6848 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6849 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006850 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6851 "%%%s; is not a parameter entity\n",
6852 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006853 }
6854 }
6855 ctxt->hasPErefs = 1;
6856 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006857 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859 xmlFree(name);
6860 }
6861 }
6862 *str = ptr;
6863 return(entity);
6864}
6865
6866/**
6867 * xmlParseDocTypeDecl:
6868 * @ctxt: an XML parser context
6869 *
6870 * parse a DOCTYPE declaration
6871 *
6872 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6873 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6874 *
6875 * [ VC: Root Element Type ]
6876 * The Name in the document type declaration must match the element
6877 * type of the root element.
6878 */
6879
6880void
6881xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006882 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 xmlChar *ExternalID = NULL;
6884 xmlChar *URI = NULL;
6885
6886 /*
6887 * We know that '<!DOCTYPE' has been detected.
6888 */
6889 SKIP(9);
6890
6891 SKIP_BLANKS;
6892
6893 /*
6894 * Parse the DOCTYPE name.
6895 */
6896 name = xmlParseName(ctxt);
6897 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6899 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006900 }
6901 ctxt->intSubName = name;
6902
6903 SKIP_BLANKS;
6904
6905 /*
6906 * Check for SystemID and ExternalID
6907 */
6908 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6909
6910 if ((URI != NULL) || (ExternalID != NULL)) {
6911 ctxt->hasExternalSubset = 1;
6912 }
6913 ctxt->extSubURI = URI;
6914 ctxt->extSubSystem = ExternalID;
6915
6916 SKIP_BLANKS;
6917
6918 /*
6919 * Create and update the internal subset.
6920 */
6921 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6922 (!ctxt->disableSAX))
6923 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6924
6925 /*
6926 * Is there any internal subset declarations ?
6927 * they are handled separately in xmlParseInternalSubset()
6928 */
6929 if (RAW == '[')
6930 return;
6931
6932 /*
6933 * We should be at the end of the DOCTYPE declaration.
6934 */
6935 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006937 }
6938 NEXT;
6939}
6940
6941/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006942 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006943 * @ctxt: an XML parser context
6944 *
6945 * parse the internal subset declaration
6946 *
6947 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6948 */
6949
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006950static void
Owen Taylor3473f882001-02-23 17:55:21 +00006951xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6952 /*
6953 * Is there any DTD definition ?
6954 */
6955 if (RAW == '[') {
6956 ctxt->instate = XML_PARSER_DTD;
6957 NEXT;
6958 /*
6959 * Parse the succession of Markup declarations and
6960 * PEReferences.
6961 * Subsequence (markupdecl | PEReference | S)*
6962 */
6963 while (RAW != ']') {
6964 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006965 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006966
6967 SKIP_BLANKS;
6968 xmlParseMarkupDecl(ctxt);
6969 xmlParsePEReference(ctxt);
6970
6971 /*
6972 * Pop-up of finished entities.
6973 */
6974 while ((RAW == 0) && (ctxt->inputNr > 1))
6975 xmlPopInput(ctxt);
6976
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006979 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006980 break;
6981 }
6982 }
6983 if (RAW == ']') {
6984 NEXT;
6985 SKIP_BLANKS;
6986 }
6987 }
6988
6989 /*
6990 * We should be at the end of the DOCTYPE declaration.
6991 */
6992 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006993 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006994 }
6995 NEXT;
6996}
6997
Daniel Veillard81273902003-09-30 00:43:48 +00006998#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006999/**
7000 * xmlParseAttribute:
7001 * @ctxt: an XML parser context
7002 * @value: a xmlChar ** used to store the value of the attribute
7003 *
7004 * parse an attribute
7005 *
7006 * [41] Attribute ::= Name Eq AttValue
7007 *
7008 * [ WFC: No External Entity References ]
7009 * Attribute values cannot contain direct or indirect entity references
7010 * to external entities.
7011 *
7012 * [ WFC: No < in Attribute Values ]
7013 * The replacement text of any entity referred to directly or indirectly in
7014 * an attribute value (other than "&lt;") must not contain a <.
7015 *
7016 * [ VC: Attribute Value Type ]
7017 * The attribute must have been declared; the value must be of the type
7018 * declared for it.
7019 *
7020 * [25] Eq ::= S? '=' S?
7021 *
7022 * With namespace:
7023 *
7024 * [NS 11] Attribute ::= QName Eq AttValue
7025 *
7026 * Also the case QName == xmlns:??? is handled independently as a namespace
7027 * definition.
7028 *
7029 * Returns the attribute name, and the value in *value.
7030 */
7031
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007032const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007033xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007034 const xmlChar *name;
7035 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007036
7037 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007038 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007039 name = xmlParseName(ctxt);
7040 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007042 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007043 return(NULL);
7044 }
7045
7046 /*
7047 * read the value
7048 */
7049 SKIP_BLANKS;
7050 if (RAW == '=') {
7051 NEXT;
7052 SKIP_BLANKS;
7053 val = xmlParseAttValue(ctxt);
7054 ctxt->instate = XML_PARSER_CONTENT;
7055 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007056 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007057 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007058 return(NULL);
7059 }
7060
7061 /*
7062 * Check that xml:lang conforms to the specification
7063 * No more registered as an error, just generate a warning now
7064 * since this was deprecated in XML second edition
7065 */
7066 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7067 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007068 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7069 "Malformed value for xml:lang : %s\n",
7070 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007071 }
7072 }
7073
7074 /*
7075 * Check that xml:space conforms to the specification
7076 */
7077 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7078 if (xmlStrEqual(val, BAD_CAST "default"))
7079 *(ctxt->space) = 0;
7080 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7081 *(ctxt->space) = 1;
7082 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007083 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007084"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007085 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007086 }
7087 }
7088
7089 *value = val;
7090 return(name);
7091}
7092
7093/**
7094 * xmlParseStartTag:
7095 * @ctxt: an XML parser context
7096 *
7097 * parse a start of tag either for rule element or
7098 * EmptyElement. In both case we don't parse the tag closing chars.
7099 *
7100 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7101 *
7102 * [ WFC: Unique Att Spec ]
7103 * No attribute name may appear more than once in the same start-tag or
7104 * empty-element tag.
7105 *
7106 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7107 *
7108 * [ WFC: Unique Att Spec ]
7109 * No attribute name may appear more than once in the same start-tag or
7110 * empty-element tag.
7111 *
7112 * With namespace:
7113 *
7114 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7115 *
7116 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7117 *
7118 * Returns the element name parsed
7119 */
7120
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007121const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007122xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007123 const xmlChar *name;
7124 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007125 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007126 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007128 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007129 int i;
7130
7131 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007132 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007133
7134 name = xmlParseName(ctxt);
7135 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007137 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 return(NULL);
7139 }
7140
7141 /*
7142 * Now parse the attributes, it ends up with the ending
7143 *
7144 * (S Attribute)* S?
7145 */
7146 SKIP_BLANKS;
7147 GROW;
7148
Daniel Veillard21a0f912001-02-25 19:54:14 +00007149 while ((RAW != '>') &&
7150 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007151 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007152 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007153 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007154
7155 attname = xmlParseAttribute(ctxt, &attvalue);
7156 if ((attname != NULL) && (attvalue != NULL)) {
7157 /*
7158 * [ WFC: Unique Att Spec ]
7159 * No attribute name may appear more than once in the same
7160 * start-tag or empty-element tag.
7161 */
7162 for (i = 0; i < nbatts;i += 2) {
7163 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007164 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 xmlFree(attvalue);
7166 goto failed;
7167 }
7168 }
Owen Taylor3473f882001-02-23 17:55:21 +00007169 /*
7170 * Add the pair to atts
7171 */
7172 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007173 maxatts = 22; /* allow for 10 attrs by default */
7174 atts = (const xmlChar **)
7175 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007176 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007177 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007178 if (attvalue != NULL)
7179 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007180 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007181 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007182 ctxt->atts = atts;
7183 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007184 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007185 const xmlChar **n;
7186
Owen Taylor3473f882001-02-23 17:55:21 +00007187 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007188 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007189 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007191 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 if (attvalue != NULL)
7193 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007196 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007197 ctxt->atts = atts;
7198 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007199 }
7200 atts[nbatts++] = attname;
7201 atts[nbatts++] = attvalue;
7202 atts[nbatts] = NULL;
7203 atts[nbatts + 1] = NULL;
7204 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007205 if (attvalue != NULL)
7206 xmlFree(attvalue);
7207 }
7208
7209failed:
7210
Daniel Veillard3772de32002-12-17 10:31:45 +00007211 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007212 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7213 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007214 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7216 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007217 }
7218 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007219 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7220 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007221 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7222 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007223 break;
7224 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007225 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007226 GROW;
7227 }
7228
7229 /*
7230 * SAX: Start of Element !
7231 */
7232 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007233 (!ctxt->disableSAX)) {
7234 if (nbatts > 0)
7235 ctxt->sax->startElement(ctxt->userData, name, atts);
7236 else
7237 ctxt->sax->startElement(ctxt->userData, name, NULL);
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239
7240 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007241 /* Free only the content strings */
7242 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007243 if (atts[i] != NULL)
7244 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007245 }
7246 return(name);
7247}
7248
7249/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007251 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 * @line: line of the start tag
7253 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007254 *
7255 * parse an end of tag
7256 *
7257 * [42] ETag ::= '</' Name S? '>'
7258 *
7259 * With namespace
7260 *
7261 * [NS 9] ETag ::= '</' QName S? '>'
7262 */
7263
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007264static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007265xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007266 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007267
7268 GROW;
7269 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007270 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007271 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007272 return;
7273 }
7274 SKIP(2);
7275
Daniel Veillard46de64e2002-05-29 08:21:33 +00007276 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007277
7278 /*
7279 * We should definitely be at the ending "S? '>'" part
7280 */
7281 GROW;
7282 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007283 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007284 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007285 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007286 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007287
7288 /*
7289 * [ WFC: Element Type Match ]
7290 * The Name in an element's end-tag must match the element type in the
7291 * start-tag.
7292 *
7293 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007294 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007295 if (name == NULL) name = BAD_CAST "unparseable";
7296 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007297 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007298 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007299 }
7300
7301 /*
7302 * SAX: End of Tag
7303 */
7304 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7305 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007306 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007307
Daniel Veillarde57ec792003-09-10 10:50:59 +00007308 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007309 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007310 return;
7311}
7312
7313/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007314 * xmlParseEndTag:
7315 * @ctxt: an XML parser context
7316 *
7317 * parse an end of tag
7318 *
7319 * [42] ETag ::= '</' Name S? '>'
7320 *
7321 * With namespace
7322 *
7323 * [NS 9] ETag ::= '</' QName S? '>'
7324 */
7325
7326void
7327xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 xmlParseEndTag1(ctxt, 0);
7329}
Daniel Veillard81273902003-09-30 00:43:48 +00007330#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331
7332/************************************************************************
7333 * *
7334 * SAX 2 specific operations *
7335 * *
7336 ************************************************************************/
7337
7338static const xmlChar *
7339xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7340 int len = 0, l;
7341 int c;
7342 int count = 0;
7343
7344 /*
7345 * Handler for more complex cases
7346 */
7347 GROW;
7348 c = CUR_CHAR(l);
7349 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007350 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007351 return(NULL);
7352 }
7353
7354 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007355 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007356 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007357 (IS_COMBINING(c)) ||
7358 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359 if (count++ > 100) {
7360 count = 0;
7361 GROW;
7362 }
7363 len += l;
7364 NEXTL(l);
7365 c = CUR_CHAR(l);
7366 }
7367 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7368}
7369
7370/*
7371 * xmlGetNamespace:
7372 * @ctxt: an XML parser context
7373 * @prefix: the prefix to lookup
7374 *
7375 * Lookup the namespace name for the @prefix (which ca be NULL)
7376 * The prefix must come from the @ctxt->dict dictionnary
7377 *
7378 * Returns the namespace name or NULL if not bound
7379 */
7380static const xmlChar *
7381xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7382 int i;
7383
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007385 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 if (ctxt->nsTab[i] == prefix) {
7387 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7388 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 return(NULL);
7392}
7393
7394/**
7395 * xmlParseNCName:
7396 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007397 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 *
7399 * parse an XML name.
7400 *
7401 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7402 * CombiningChar | Extender
7403 *
7404 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7405 *
7406 * Returns the Name parsed or NULL
7407 */
7408
7409static const xmlChar *
7410xmlParseNCName(xmlParserCtxtPtr ctxt) {
7411 const xmlChar *in;
7412 const xmlChar *ret;
7413 int count = 0;
7414
7415 /*
7416 * Accelerator for simple ASCII names
7417 */
7418 in = ctxt->input->cur;
7419 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7420 ((*in >= 0x41) && (*in <= 0x5A)) ||
7421 (*in == '_')) {
7422 in++;
7423 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7424 ((*in >= 0x41) && (*in <= 0x5A)) ||
7425 ((*in >= 0x30) && (*in <= 0x39)) ||
7426 (*in == '_') || (*in == '-') ||
7427 (*in == '.'))
7428 in++;
7429 if ((*in > 0) && (*in < 0x80)) {
7430 count = in - ctxt->input->cur;
7431 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7432 ctxt->input->cur = in;
7433 ctxt->nbChars += count;
7434 ctxt->input->col += count;
7435 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007436 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 }
7438 return(ret);
7439 }
7440 }
7441 return(xmlParseNCNameComplex(ctxt));
7442}
7443
7444/**
7445 * xmlParseQName:
7446 * @ctxt: an XML parser context
7447 * @prefix: pointer to store the prefix part
7448 *
7449 * parse an XML Namespace QName
7450 *
7451 * [6] QName ::= (Prefix ':')? LocalPart
7452 * [7] Prefix ::= NCName
7453 * [8] LocalPart ::= NCName
7454 *
7455 * Returns the Name parsed or NULL
7456 */
7457
7458static const xmlChar *
7459xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7460 const xmlChar *l, *p;
7461
7462 GROW;
7463
7464 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007465 if (l == NULL) {
7466 if (CUR == ':') {
7467 l = xmlParseName(ctxt);
7468 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7470 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007471 *prefix = NULL;
7472 return(l);
7473 }
7474 }
7475 return(NULL);
7476 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477 if (CUR == ':') {
7478 NEXT;
7479 p = l;
7480 l = xmlParseNCName(ctxt);
7481 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007482 xmlChar *tmp;
7483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007484 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7485 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007486 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7487 p = xmlDictLookup(ctxt->dict, tmp, -1);
7488 if (tmp != NULL) xmlFree(tmp);
7489 *prefix = NULL;
7490 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007491 }
7492 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007493 xmlChar *tmp;
7494
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007495 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007497 NEXT;
7498 tmp = (xmlChar *) xmlParseName(ctxt);
7499 if (tmp != NULL) {
7500 tmp = xmlBuildQName(tmp, l, NULL, 0);
7501 l = xmlDictLookup(ctxt->dict, tmp, -1);
7502 if (tmp != NULL) xmlFree(tmp);
7503 *prefix = p;
7504 return(l);
7505 }
7506 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7507 l = xmlDictLookup(ctxt->dict, tmp, -1);
7508 if (tmp != NULL) xmlFree(tmp);
7509 *prefix = p;
7510 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 }
7512 *prefix = p;
7513 } else
7514 *prefix = NULL;
7515 return(l);
7516}
7517
7518/**
7519 * xmlParseQNameAndCompare:
7520 * @ctxt: an XML parser context
7521 * @name: the localname
7522 * @prefix: the prefix, if any.
7523 *
7524 * parse an XML name and compares for match
7525 * (specialized for endtag parsing)
7526 *
7527 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7528 * and the name for mismatch
7529 */
7530
7531static const xmlChar *
7532xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7533 xmlChar const *prefix) {
7534 const xmlChar *cmp = name;
7535 const xmlChar *in;
7536 const xmlChar *ret;
7537 const xmlChar *prefix2;
7538
7539 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7540
7541 GROW;
7542 in = ctxt->input->cur;
7543
7544 cmp = prefix;
7545 while (*in != 0 && *in == *cmp) {
7546 ++in;
7547 ++cmp;
7548 }
7549 if ((*cmp == 0) && (*in == ':')) {
7550 in++;
7551 cmp = name;
7552 while (*in != 0 && *in == *cmp) {
7553 ++in;
7554 ++cmp;
7555 }
William M. Brack76e95df2003-10-18 16:20:14 +00007556 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 /* success */
7558 ctxt->input->cur = in;
7559 return((const xmlChar*) 1);
7560 }
7561 }
7562 /*
7563 * all strings coms from the dictionary, equality can be done directly
7564 */
7565 ret = xmlParseQName (ctxt, &prefix2);
7566 if ((ret == name) && (prefix == prefix2))
7567 return((const xmlChar*) 1);
7568 return ret;
7569}
7570
7571/**
7572 * xmlParseAttValueInternal:
7573 * @ctxt: an XML parser context
7574 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007575 * @alloc: whether the attribute was reallocated as a new string
7576 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577 *
7578 * parse a value for an attribute.
7579 * NOTE: if no normalization is needed, the routine will return pointers
7580 * directly from the data buffer.
7581 *
7582 * 3.3.3 Attribute-Value Normalization:
7583 * Before the value of an attribute is passed to the application or
7584 * checked for validity, the XML processor must normalize it as follows:
7585 * - a character reference is processed by appending the referenced
7586 * character to the attribute value
7587 * - an entity reference is processed by recursively processing the
7588 * replacement text of the entity
7589 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7590 * appending #x20 to the normalized value, except that only a single
7591 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7592 * parsed entity or the literal entity value of an internal parsed entity
7593 * - other characters are processed by appending them to the normalized value
7594 * If the declared value is not CDATA, then the XML processor must further
7595 * process the normalized attribute value by discarding any leading and
7596 * trailing space (#x20) characters, and by replacing sequences of space
7597 * (#x20) characters by a single space (#x20) character.
7598 * All attributes for which no declaration has been read should be treated
7599 * by a non-validating parser as if declared CDATA.
7600 *
7601 * Returns the AttValue parsed or NULL. The value has to be freed by the
7602 * caller if it was copied, this can be detected by val[*len] == 0.
7603 */
7604
7605static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007606xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7607 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007608{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007610 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 xmlChar *ret = NULL;
7612
7613 GROW;
7614 in = (xmlChar *) CUR_PTR;
7615 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007616 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007617 return (NULL);
7618 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007620
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007621 /*
7622 * try to handle in this routine the most common case where no
7623 * allocation of a new string is required and where content is
7624 * pure ASCII.
7625 */
7626 limit = *in++;
7627 end = ctxt->input->end;
7628 start = in;
7629 if (in >= end) {
7630 const xmlChar *oldbase = ctxt->input->base;
7631 GROW;
7632 if (oldbase != ctxt->input->base) {
7633 long delta = ctxt->input->base - oldbase;
7634 start = start + delta;
7635 in = in + delta;
7636 }
7637 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007639 if (normalize) {
7640 /*
7641 * Skip any leading spaces
7642 */
7643 while ((in < end) && (*in != limit) &&
7644 ((*in == 0x20) || (*in == 0x9) ||
7645 (*in == 0xA) || (*in == 0xD))) {
7646 in++;
7647 start = in;
7648 if (in >= end) {
7649 const xmlChar *oldbase = ctxt->input->base;
7650 GROW;
7651 if (oldbase != ctxt->input->base) {
7652 long delta = ctxt->input->base - oldbase;
7653 start = start + delta;
7654 in = in + delta;
7655 }
7656 end = ctxt->input->end;
7657 }
7658 }
7659 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7660 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7661 if ((*in++ == 0x20) && (*in == 0x20)) break;
7662 if (in >= end) {
7663 const xmlChar *oldbase = ctxt->input->base;
7664 GROW;
7665 if (oldbase != ctxt->input->base) {
7666 long delta = ctxt->input->base - oldbase;
7667 start = start + delta;
7668 in = in + delta;
7669 }
7670 end = ctxt->input->end;
7671 }
7672 }
7673 last = in;
7674 /*
7675 * skip the trailing blanks
7676 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007677 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 while ((in < end) && (*in != limit) &&
7679 ((*in == 0x20) || (*in == 0x9) ||
7680 (*in == 0xA) || (*in == 0xD))) {
7681 in++;
7682 if (in >= end) {
7683 const xmlChar *oldbase = ctxt->input->base;
7684 GROW;
7685 if (oldbase != ctxt->input->base) {
7686 long delta = ctxt->input->base - oldbase;
7687 start = start + delta;
7688 in = in + delta;
7689 last = last + delta;
7690 }
7691 end = ctxt->input->end;
7692 }
7693 }
7694 if (*in != limit) goto need_complex;
7695 } else {
7696 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7697 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7698 in++;
7699 if (in >= end) {
7700 const xmlChar *oldbase = ctxt->input->base;
7701 GROW;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7705 in = in + delta;
7706 }
7707 end = ctxt->input->end;
7708 }
7709 }
7710 last = in;
7711 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007713 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007715 *len = last - start;
7716 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007718 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007719 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 }
7721 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007724need_complex:
7725 if (alloc) *alloc = 1;
7726 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727}
7728
7729/**
7730 * xmlParseAttribute2:
7731 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007732 * @pref: the element prefix
7733 * @elem: the element name
7734 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007736 * @len: an int * to save the length of the attribute
7737 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007738 *
7739 * parse an attribute in the new SAX2 framework.
7740 *
7741 * Returns the attribute name, and the value in *value, .
7742 */
7743
7744static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007745xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7746 const xmlChar *pref, const xmlChar *elem,
7747 const xmlChar **prefix, xmlChar **value,
7748 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007750 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007751 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752
7753 *value = NULL;
7754 GROW;
7755 name = xmlParseQName(ctxt, prefix);
7756 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7758 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 return(NULL);
7760 }
7761
7762 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007763 * get the type if needed
7764 */
7765 if (ctxt->attsSpecial != NULL) {
7766 int type;
7767
7768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7769 pref, elem, *prefix, name);
7770 if (type != 0) normalize = 1;
7771 }
7772
7773 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 * read the value
7775 */
7776 SKIP_BLANKS;
7777 if (RAW == '=') {
7778 NEXT;
7779 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007780 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 ctxt->instate = XML_PARSER_CONTENT;
7782 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007783 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 return(NULL);
7786 }
7787
Daniel Veillardd8925572005-06-08 22:34:55 +00007788 if (*prefix == ctxt->str_xml) {
7789 /*
7790 * Check that xml:lang conforms to the specification
7791 * No more registered as an error, just generate a warning now
7792 * since this was deprecated in XML second edition
7793 */
7794 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7795 internal_val = xmlStrndup(val, *len);
7796 if (!xmlCheckLanguageID(internal_val)) {
7797 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7798 "Malformed value for xml:lang : %s\n",
7799 internal_val, NULL);
7800 }
7801 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802
Daniel Veillardd8925572005-06-08 22:34:55 +00007803 /*
7804 * Check that xml:space conforms to the specification
7805 */
7806 if (xmlStrEqual(name, BAD_CAST "space")) {
7807 internal_val = xmlStrndup(val, *len);
7808 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7809 *(ctxt->space) = 0;
7810 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7811 *(ctxt->space) = 1;
7812 else {
7813 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007815 internal_val, NULL);
7816 }
7817 }
7818 if (internal_val) {
7819 xmlFree(internal_val);
7820 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822
7823 *value = val;
7824 return(name);
7825}
7826
7827/**
7828 * xmlParseStartTag2:
7829 * @ctxt: an XML parser context
7830 *
7831 * parse a start of tag either for rule element or
7832 * EmptyElement. In both case we don't parse the tag closing chars.
7833 * This routine is called when running SAX2 parsing
7834 *
7835 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7836 *
7837 * [ WFC: Unique Att Spec ]
7838 * No attribute name may appear more than once in the same start-tag or
7839 * empty-element tag.
7840 *
7841 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7842 *
7843 * [ WFC: Unique Att Spec ]
7844 * No attribute name may appear more than once in the same start-tag or
7845 * empty-element tag.
7846 *
7847 * With namespace:
7848 *
7849 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7850 *
7851 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7852 *
7853 * Returns the element name parsed
7854 */
7855
7856static const xmlChar *
7857xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007858 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 const xmlChar *localname;
7860 const xmlChar *prefix;
7861 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007862 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 const xmlChar *nsname;
7864 xmlChar *attvalue;
7865 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 int nratts, nbatts, nbdef;
7868 int i, j, nbNs, attval;
7869 const xmlChar *base;
7870 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007871 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872
7873 if (RAW != '<') return(NULL);
7874 NEXT1;
7875
7876 /*
7877 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7878 * point since the attribute values may be stored as pointers to
7879 * the buffer and calling SHRINK would destroy them !
7880 * The Shrinking is only possible once the full set of attribute
7881 * callbacks have been done.
7882 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007885 base = ctxt->input->base;
7886 cur = ctxt->input->cur - ctxt->input->base;
7887 nbatts = 0;
7888 nratts = 0;
7889 nbdef = 0;
7890 nbNs = 0;
7891 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007892 /* Forget any namespaces added during an earlier parse of this element. */
7893 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007894
7895 localname = xmlParseQName(ctxt, &prefix);
7896 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7898 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007899 return(NULL);
7900 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007901 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902
7903 /*
7904 * Now parse the attributes, it ends up with the ending
7905 *
7906 * (S Attribute)* S?
7907 */
7908 SKIP_BLANKS;
7909 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007910 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007911
7912 while ((RAW != '>') &&
7913 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007914 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007915 const xmlChar *q = CUR_PTR;
7916 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007917 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007918
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007919 attname = xmlParseAttribute2(ctxt, prefix, localname,
7920 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921 if ((attname != NULL) && (attvalue != NULL)) {
7922 if (len < 0) len = xmlStrlen(attvalue);
7923 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007924 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7925 xmlURIPtr uri;
7926
7927 if (*URL != 0) {
7928 uri = xmlParseURI((const char *) URL);
7929 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007930 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7931 "xmlns: %s not a valid URI\n",
7932 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007933 } else {
7934 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007935 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7936 "xmlns: URI %s is not absolute\n",
7937 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007938 }
7939 xmlFreeURI(uri);
7940 }
7941 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007943 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007944 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007945 for (j = 1;j <= nbNs;j++)
7946 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7947 break;
7948 if (j <= nbNs)
7949 xmlErrAttributeDup(ctxt, NULL, attname);
7950 else
7951 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007952 if (alloc != 0) xmlFree(attvalue);
7953 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 continue;
7955 }
7956 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007957 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7958 xmlURIPtr uri;
7959
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007960 if (attname == ctxt->str_xml) {
7961 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007962 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7963 "xml namespace prefix mapped to wrong URI\n",
7964 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007965 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007966 /*
7967 * Do not keep a namespace definition node
7968 */
7969 if (alloc != 0) xmlFree(attvalue);
7970 SKIP_BLANKS;
7971 continue;
7972 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007973 uri = xmlParseURI((const char *) URL);
7974 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007975 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7976 "xmlns:%s: '%s' is not a valid URI\n",
7977 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007978 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007979 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007980 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7981 "xmlns:%s: URI %s is not absolute\n",
7982 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007983 }
7984 xmlFreeURI(uri);
7985 }
7986
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007988 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007989 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007990 for (j = 1;j <= nbNs;j++)
7991 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7992 break;
7993 if (j <= nbNs)
7994 xmlErrAttributeDup(ctxt, aprefix, attname);
7995 else
7996 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007997 if (alloc != 0) xmlFree(attvalue);
7998 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007999 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008000 continue;
8001 }
8002
8003 /*
8004 * Add the pair to atts
8005 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008006 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8007 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008008 if (attvalue[len] == 0)
8009 xmlFree(attvalue);
8010 goto failed;
8011 }
8012 maxatts = ctxt->maxatts;
8013 atts = ctxt->atts;
8014 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008015 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 atts[nbatts++] = attname;
8017 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008018 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008019 atts[nbatts++] = attvalue;
8020 attvalue += len;
8021 atts[nbatts++] = attvalue;
8022 /*
8023 * tag if some deallocation is needed
8024 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008025 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026 } else {
8027 if ((attvalue != NULL) && (attvalue[len] == 0))
8028 xmlFree(attvalue);
8029 }
8030
8031failed:
8032
8033 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008034 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008035 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8036 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008037 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008038 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8039 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008040 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008041 }
8042 SKIP_BLANKS;
8043 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8044 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008045 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008046 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008047 break;
8048 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008050 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 }
8052
Daniel Veillard0fb18932003-09-07 09:14:37 +00008053 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008054 * The attributes defaulting
8055 */
8056 if (ctxt->attsDefault != NULL) {
8057 xmlDefAttrsPtr defaults;
8058
8059 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8060 if (defaults != NULL) {
8061 for (i = 0;i < defaults->nbAttrs;i++) {
8062 attname = defaults->values[4 * i];
8063 aprefix = defaults->values[4 * i + 1];
8064
8065 /*
8066 * special work for namespaces defaulted defs
8067 */
8068 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8069 /*
8070 * check that it's not a defined namespace
8071 */
8072 for (j = 1;j <= nbNs;j++)
8073 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8074 break;
8075 if (j <= nbNs) continue;
8076
8077 nsname = xmlGetNamespace(ctxt, NULL);
8078 if (nsname != defaults->values[4 * i + 2]) {
8079 if (nsPush(ctxt, NULL,
8080 defaults->values[4 * i + 2]) > 0)
8081 nbNs++;
8082 }
8083 } else if (aprefix == ctxt->str_xmlns) {
8084 /*
8085 * check that it's not a defined namespace
8086 */
8087 for (j = 1;j <= nbNs;j++)
8088 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8089 break;
8090 if (j <= nbNs) continue;
8091
8092 nsname = xmlGetNamespace(ctxt, attname);
8093 if (nsname != defaults->values[2]) {
8094 if (nsPush(ctxt, attname,
8095 defaults->values[4 * i + 2]) > 0)
8096 nbNs++;
8097 }
8098 } else {
8099 /*
8100 * check that it's not a defined attribute
8101 */
8102 for (j = 0;j < nbatts;j+=5) {
8103 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8104 break;
8105 }
8106 if (j < nbatts) continue;
8107
8108 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8109 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008110 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008111 }
8112 maxatts = ctxt->maxatts;
8113 atts = ctxt->atts;
8114 }
8115 atts[nbatts++] = attname;
8116 atts[nbatts++] = aprefix;
8117 if (aprefix == NULL)
8118 atts[nbatts++] = NULL;
8119 else
8120 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8121 atts[nbatts++] = defaults->values[4 * i + 2];
8122 atts[nbatts++] = defaults->values[4 * i + 3];
8123 nbdef++;
8124 }
8125 }
8126 }
8127 }
8128
Daniel Veillarde70c8772003-11-25 07:21:18 +00008129 /*
8130 * The attributes checkings
8131 */
8132 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008133 /*
8134 * The default namespace does not apply to attribute names.
8135 */
8136 if (atts[i + 1] != NULL) {
8137 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8138 if (nsname == NULL) {
8139 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8140 "Namespace prefix %s for %s on %s is not defined\n",
8141 atts[i + 1], atts[i], localname);
8142 }
8143 atts[i + 2] = nsname;
8144 } else
8145 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008146 /*
8147 * [ WFC: Unique Att Spec ]
8148 * No attribute name may appear more than once in the same
8149 * start-tag or empty-element tag.
8150 * As extended by the Namespace in XML REC.
8151 */
8152 for (j = 0; j < i;j += 5) {
8153 if (atts[i] == atts[j]) {
8154 if (atts[i+1] == atts[j+1]) {
8155 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8156 break;
8157 }
8158 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8159 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8160 "Namespaced Attribute %s in '%s' redefined\n",
8161 atts[i], nsname, NULL);
8162 break;
8163 }
8164 }
8165 }
8166 }
8167
Daniel Veillarde57ec792003-09-10 10:50:59 +00008168 nsname = xmlGetNamespace(ctxt, prefix);
8169 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008170 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8171 "Namespace prefix %s on %s is not defined\n",
8172 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008173 }
8174 *pref = prefix;
8175 *URI = nsname;
8176
8177 /*
8178 * SAX: Start of Element !
8179 */
8180 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8181 (!ctxt->disableSAX)) {
8182 if (nbNs > 0)
8183 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8184 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8185 nbatts / 5, nbdef, atts);
8186 else
8187 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8188 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8189 }
8190
8191 /*
8192 * Free up attribute allocated strings if needed
8193 */
8194 if (attval != 0) {
8195 for (i = 3,j = 0; j < nratts;i += 5,j++)
8196 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8197 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008198 }
8199
8200 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008201
8202base_changed:
8203 /*
8204 * the attribute strings are valid iif the base didn't changed
8205 */
8206 if (attval != 0) {
8207 for (i = 3,j = 0; j < nratts;i += 5,j++)
8208 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8209 xmlFree((xmlChar *) atts[i]);
8210 }
8211 ctxt->input->cur = ctxt->input->base + cur;
8212 if (ctxt->wellFormed == 1) {
8213 goto reparse;
8214 }
8215 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008216}
8217
8218/**
8219 * xmlParseEndTag2:
8220 * @ctxt: an XML parser context
8221 * @line: line of the start tag
8222 * @nsNr: number of namespaces on the start tag
8223 *
8224 * parse an end of tag
8225 *
8226 * [42] ETag ::= '</' Name S? '>'
8227 *
8228 * With namespace
8229 *
8230 * [NS 9] ETag ::= '</' QName S? '>'
8231 */
8232
8233static void
8234xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008235 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008236 const xmlChar *name;
8237
8238 GROW;
8239 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008240 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241 return;
8242 }
8243 SKIP(2);
8244
William M. Brack13dfa872004-09-18 04:52:08 +00008245 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008246 if (ctxt->input->cur[tlen] == '>') {
8247 ctxt->input->cur += tlen + 1;
8248 goto done;
8249 }
8250 ctxt->input->cur += tlen;
8251 name = (xmlChar*)1;
8252 } else {
8253 if (prefix == NULL)
8254 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8255 else
8256 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8257 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008258
8259 /*
8260 * We should definitely be at the ending "S? '>'" part
8261 */
8262 GROW;
8263 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008264 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008265 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008266 } else
8267 NEXT1;
8268
8269 /*
8270 * [ WFC: Element Type Match ]
8271 * The Name in an element's end-tag must match the element type in the
8272 * start-tag.
8273 *
8274 */
8275 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008276 if (name == NULL) name = BAD_CAST "unparseable";
8277 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008279 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008280 }
8281
8282 /*
8283 * SAX: End of Tag
8284 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008285done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008286 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8287 (!ctxt->disableSAX))
8288 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8289
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 spacePop(ctxt);
8291 if (nsNr != 0)
8292 nsPop(ctxt, nsNr);
8293 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008294}
8295
8296/**
Owen Taylor3473f882001-02-23 17:55:21 +00008297 * xmlParseCDSect:
8298 * @ctxt: an XML parser context
8299 *
8300 * Parse escaped pure raw content.
8301 *
8302 * [18] CDSect ::= CDStart CData CDEnd
8303 *
8304 * [19] CDStart ::= '<![CDATA['
8305 *
8306 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8307 *
8308 * [21] CDEnd ::= ']]>'
8309 */
8310void
8311xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8312 xmlChar *buf = NULL;
8313 int len = 0;
8314 int size = XML_PARSER_BUFFER_SIZE;
8315 int r, rl;
8316 int s, sl;
8317 int cur, l;
8318 int count = 0;
8319
Daniel Veillard8f597c32003-10-06 08:19:27 +00008320 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008321 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008322 SKIP(9);
8323 } else
8324 return;
8325
8326 ctxt->instate = XML_PARSER_CDATA_SECTION;
8327 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008328 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008329 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008330 ctxt->instate = XML_PARSER_CONTENT;
8331 return;
8332 }
8333 NEXTL(rl);
8334 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008335 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008336 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008337 ctxt->instate = XML_PARSER_CONTENT;
8338 return;
8339 }
8340 NEXTL(sl);
8341 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008342 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008343 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008344 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008345 return;
8346 }
William M. Brack871611b2003-10-18 04:53:14 +00008347 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008348 ((r != ']') || (s != ']') || (cur != '>'))) {
8349 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008350 xmlChar *tmp;
8351
Owen Taylor3473f882001-02-23 17:55:21 +00008352 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008353 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8354 if (tmp == NULL) {
8355 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008356 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008357 return;
8358 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008359 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008360 }
8361 COPY_BUF(rl,buf,len,r);
8362 r = s;
8363 rl = sl;
8364 s = cur;
8365 sl = l;
8366 count++;
8367 if (count > 50) {
8368 GROW;
8369 count = 0;
8370 }
8371 NEXTL(l);
8372 cur = CUR_CHAR(l);
8373 }
8374 buf[len] = 0;
8375 ctxt->instate = XML_PARSER_CONTENT;
8376 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008377 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008378 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008379 xmlFree(buf);
8380 return;
8381 }
8382 NEXTL(l);
8383
8384 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008385 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008386 */
8387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8388 if (ctxt->sax->cdataBlock != NULL)
8389 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008390 else if (ctxt->sax->characters != NULL)
8391 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008392 }
8393 xmlFree(buf);
8394}
8395
8396/**
8397 * xmlParseContent:
8398 * @ctxt: an XML parser context
8399 *
8400 * Parse a content:
8401 *
8402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8403 */
8404
8405void
8406xmlParseContent(xmlParserCtxtPtr ctxt) {
8407 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008408 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008409 ((RAW != '<') || (NXT(1) != '/')) &&
8410 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008411 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008412 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008413 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008414
8415 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008416 * First case : a Processing Instruction.
8417 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008418 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008419 xmlParsePI(ctxt);
8420 }
8421
8422 /*
8423 * Second case : a CDSection
8424 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008425 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008426 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008427 xmlParseCDSect(ctxt);
8428 }
8429
8430 /*
8431 * Third case : a comment
8432 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008433 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008434 (NXT(2) == '-') && (NXT(3) == '-')) {
8435 xmlParseComment(ctxt);
8436 ctxt->instate = XML_PARSER_CONTENT;
8437 }
8438
8439 /*
8440 * Fourth case : a sub-element.
8441 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008442 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008443 xmlParseElement(ctxt);
8444 }
8445
8446 /*
8447 * Fifth case : a reference. If if has not been resolved,
8448 * parsing returns it's Name, create the node
8449 */
8450
Daniel Veillard21a0f912001-02-25 19:54:14 +00008451 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008452 xmlParseReference(ctxt);
8453 }
8454
8455 /*
8456 * Last case, text. Note that References are handled directly.
8457 */
8458 else {
8459 xmlParseCharData(ctxt, 0);
8460 }
8461
8462 GROW;
8463 /*
8464 * Pop-up of finished entities.
8465 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008466 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008467 xmlPopInput(ctxt);
8468 SHRINK;
8469
Daniel Veillardfdc91562002-07-01 21:52:03 +00008470 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008471 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8472 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008473 ctxt->instate = XML_PARSER_EOF;
8474 break;
8475 }
8476 }
8477}
8478
8479/**
8480 * xmlParseElement:
8481 * @ctxt: an XML parser context
8482 *
8483 * parse an XML element, this is highly recursive
8484 *
8485 * [39] element ::= EmptyElemTag | STag content ETag
8486 *
8487 * [ WFC: Element Type Match ]
8488 * The Name in an element's end-tag must match the element type in the
8489 * start-tag.
8490 *
Owen Taylor3473f882001-02-23 17:55:21 +00008491 */
8492
8493void
8494xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008495 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 const xmlChar *prefix;
8497 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008499 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008500 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008501 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008502
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008503 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8504 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8505 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8506 xmlParserMaxDepth);
8507 ctxt->instate = XML_PARSER_EOF;
8508 return;
8509 }
8510
Owen Taylor3473f882001-02-23 17:55:21 +00008511 /* Capture start position */
8512 if (ctxt->record_info) {
8513 node_info.begin_pos = ctxt->input->consumed +
8514 (CUR_PTR - ctxt->input->base);
8515 node_info.begin_line = ctxt->input->line;
8516 }
8517
8518 if (ctxt->spaceNr == 0)
8519 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008520 else if (*ctxt->space == -2)
8521 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008522 else
8523 spacePush(ctxt, *ctxt->space);
8524
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008525 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008526#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008528#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008529 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008530#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008531 else
8532 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008533#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008534 if (name == NULL) {
8535 spacePop(ctxt);
8536 return;
8537 }
8538 namePush(ctxt, name);
8539 ret = ctxt->node;
8540
Daniel Veillard4432df22003-09-28 18:58:27 +00008541#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008542 /*
8543 * [ VC: Root Element Type ]
8544 * The Name in the document type declaration must match the element
8545 * type of the root element.
8546 */
8547 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8548 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8549 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008550#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008551
8552 /*
8553 * Check for an Empty Element.
8554 */
8555 if ((RAW == '/') && (NXT(1) == '>')) {
8556 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008557 if (ctxt->sax2) {
8558 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8559 (!ctxt->disableSAX))
8560 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008561#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 } else {
8563 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8564 (!ctxt->disableSAX))
8565 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008566#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008567 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008568 namePop(ctxt);
8569 spacePop(ctxt);
8570 if (nsNr != ctxt->nsNr)
8571 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008572 if ( ret != NULL && ctxt->record_info ) {
8573 node_info.end_pos = ctxt->input->consumed +
8574 (CUR_PTR - ctxt->input->base);
8575 node_info.end_line = ctxt->input->line;
8576 node_info.node = ret;
8577 xmlParserAddNodeInfo(ctxt, &node_info);
8578 }
8579 return;
8580 }
8581 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008582 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008583 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008584 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8585 "Couldn't find end of Start Tag %s line %d\n",
8586 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587
8588 /*
8589 * end of parsing of this node.
8590 */
8591 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008592 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008593 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008594 if (nsNr != ctxt->nsNr)
8595 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008596
8597 /*
8598 * Capture end position and add node
8599 */
8600 if ( ret != NULL && ctxt->record_info ) {
8601 node_info.end_pos = ctxt->input->consumed +
8602 (CUR_PTR - ctxt->input->base);
8603 node_info.end_line = ctxt->input->line;
8604 node_info.node = ret;
8605 xmlParserAddNodeInfo(ctxt, &node_info);
8606 }
8607 return;
8608 }
8609
8610 /*
8611 * Parse the content of the element:
8612 */
8613 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008614 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008615 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008616 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008617 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008618
8619 /*
8620 * end of parsing of this node.
8621 */
8622 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008623 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008625 if (nsNr != ctxt->nsNr)
8626 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 return;
8628 }
8629
8630 /*
8631 * parse the end of tag: '</' should be here.
8632 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008634 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008635 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008636 }
8637#ifdef LIBXML_SAX1_ENABLED
8638 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008639 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008640#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008641
8642 /*
8643 * Capture end position and add node
8644 */
8645 if ( ret != NULL && ctxt->record_info ) {
8646 node_info.end_pos = ctxt->input->consumed +
8647 (CUR_PTR - ctxt->input->base);
8648 node_info.end_line = ctxt->input->line;
8649 node_info.node = ret;
8650 xmlParserAddNodeInfo(ctxt, &node_info);
8651 }
8652}
8653
8654/**
8655 * xmlParseVersionNum:
8656 * @ctxt: an XML parser context
8657 *
8658 * parse the XML version value.
8659 *
8660 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8661 *
8662 * Returns the string giving the XML version number, or NULL
8663 */
8664xmlChar *
8665xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8666 xmlChar *buf = NULL;
8667 int len = 0;
8668 int size = 10;
8669 xmlChar cur;
8670
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008671 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008672 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008674 return(NULL);
8675 }
8676 cur = CUR;
8677 while (((cur >= 'a') && (cur <= 'z')) ||
8678 ((cur >= 'A') && (cur <= 'Z')) ||
8679 ((cur >= '0') && (cur <= '9')) ||
8680 (cur == '_') || (cur == '.') ||
8681 (cur == ':') || (cur == '-')) {
8682 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008683 xmlChar *tmp;
8684
Owen Taylor3473f882001-02-23 17:55:21 +00008685 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008686 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8687 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008688 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008689 return(NULL);
8690 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008691 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008692 }
8693 buf[len++] = cur;
8694 NEXT;
8695 cur=CUR;
8696 }
8697 buf[len] = 0;
8698 return(buf);
8699}
8700
8701/**
8702 * xmlParseVersionInfo:
8703 * @ctxt: an XML parser context
8704 *
8705 * parse the XML version.
8706 *
8707 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8708 *
8709 * [25] Eq ::= S? '=' S?
8710 *
8711 * Returns the version string, e.g. "1.0"
8712 */
8713
8714xmlChar *
8715xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8716 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008717
Daniel Veillarda07050d2003-10-19 14:46:32 +00008718 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008719 SKIP(7);
8720 SKIP_BLANKS;
8721 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008722 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008723 return(NULL);
8724 }
8725 NEXT;
8726 SKIP_BLANKS;
8727 if (RAW == '"') {
8728 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008729 version = xmlParseVersionNum(ctxt);
8730 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008731 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008732 } else
8733 NEXT;
8734 } else if (RAW == '\''){
8735 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008736 version = xmlParseVersionNum(ctxt);
8737 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008738 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008739 } else
8740 NEXT;
8741 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008742 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008743 }
8744 }
8745 return(version);
8746}
8747
8748/**
8749 * xmlParseEncName:
8750 * @ctxt: an XML parser context
8751 *
8752 * parse the XML encoding name
8753 *
8754 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8755 *
8756 * Returns the encoding name value or NULL
8757 */
8758xmlChar *
8759xmlParseEncName(xmlParserCtxtPtr ctxt) {
8760 xmlChar *buf = NULL;
8761 int len = 0;
8762 int size = 10;
8763 xmlChar cur;
8764
8765 cur = CUR;
8766 if (((cur >= 'a') && (cur <= 'z')) ||
8767 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008768 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008769 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008770 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008771 return(NULL);
8772 }
8773
8774 buf[len++] = cur;
8775 NEXT;
8776 cur = CUR;
8777 while (((cur >= 'a') && (cur <= 'z')) ||
8778 ((cur >= 'A') && (cur <= 'Z')) ||
8779 ((cur >= '0') && (cur <= '9')) ||
8780 (cur == '.') || (cur == '_') ||
8781 (cur == '-')) {
8782 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008783 xmlChar *tmp;
8784
Owen Taylor3473f882001-02-23 17:55:21 +00008785 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008786 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8787 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008788 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008789 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008790 return(NULL);
8791 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008792 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008793 }
8794 buf[len++] = cur;
8795 NEXT;
8796 cur = CUR;
8797 if (cur == 0) {
8798 SHRINK;
8799 GROW;
8800 cur = CUR;
8801 }
8802 }
8803 buf[len] = 0;
8804 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008805 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008806 }
8807 return(buf);
8808}
8809
8810/**
8811 * xmlParseEncodingDecl:
8812 * @ctxt: an XML parser context
8813 *
8814 * parse the XML encoding declaration
8815 *
8816 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8817 *
8818 * this setups the conversion filters.
8819 *
8820 * Returns the encoding value or NULL
8821 */
8822
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008823const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008824xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8825 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008826
8827 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008828 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008829 SKIP(8);
8830 SKIP_BLANKS;
8831 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008832 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008833 return(NULL);
8834 }
8835 NEXT;
8836 SKIP_BLANKS;
8837 if (RAW == '"') {
8838 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008839 encoding = xmlParseEncName(ctxt);
8840 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008841 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008842 } else
8843 NEXT;
8844 } else if (RAW == '\''){
8845 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008846 encoding = xmlParseEncName(ctxt);
8847 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008848 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008849 } else
8850 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008851 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008852 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008854 /*
8855 * UTF-16 encoding stwich has already taken place at this stage,
8856 * more over the little-endian/big-endian selection is already done
8857 */
8858 if ((encoding != NULL) &&
8859 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8860 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008861 if (ctxt->encoding != NULL)
8862 xmlFree((xmlChar *) ctxt->encoding);
8863 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008864 }
8865 /*
8866 * UTF-8 encoding is handled natively
8867 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008868 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008869 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8870 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008871 if (ctxt->encoding != NULL)
8872 xmlFree((xmlChar *) ctxt->encoding);
8873 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008874 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008875 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008876 xmlCharEncodingHandlerPtr handler;
8877
8878 if (ctxt->input->encoding != NULL)
8879 xmlFree((xmlChar *) ctxt->input->encoding);
8880 ctxt->input->encoding = encoding;
8881
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008882 handler = xmlFindCharEncodingHandler((const char *) encoding);
8883 if (handler != NULL) {
8884 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008885 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008887 "Unsupported encoding %s\n", encoding);
8888 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008889 }
8890 }
8891 }
8892 return(encoding);
8893}
8894
8895/**
8896 * xmlParseSDDecl:
8897 * @ctxt: an XML parser context
8898 *
8899 * parse the XML standalone declaration
8900 *
8901 * [32] SDDecl ::= S 'standalone' Eq
8902 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8903 *
8904 * [ VC: Standalone Document Declaration ]
8905 * TODO The standalone document declaration must have the value "no"
8906 * if any external markup declarations contain declarations of:
8907 * - attributes with default values, if elements to which these
8908 * attributes apply appear in the document without specifications
8909 * of values for these attributes, or
8910 * - entities (other than amp, lt, gt, apos, quot), if references
8911 * to those entities appear in the document, or
8912 * - attributes with values subject to normalization, where the
8913 * attribute appears in the document with a value which will change
8914 * as a result of normalization, or
8915 * - element types with element content, if white space occurs directly
8916 * within any instance of those types.
8917 *
8918 * Returns 1 if standalone, 0 otherwise
8919 */
8920
8921int
8922xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8923 int standalone = -1;
8924
8925 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008926 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008927 SKIP(10);
8928 SKIP_BLANKS;
8929 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008930 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008931 return(standalone);
8932 }
8933 NEXT;
8934 SKIP_BLANKS;
8935 if (RAW == '\''){
8936 NEXT;
8937 if ((RAW == 'n') && (NXT(1) == 'o')) {
8938 standalone = 0;
8939 SKIP(2);
8940 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8941 (NXT(2) == 's')) {
8942 standalone = 1;
8943 SKIP(3);
8944 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008945 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008946 }
8947 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008948 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008949 } else
8950 NEXT;
8951 } else if (RAW == '"'){
8952 NEXT;
8953 if ((RAW == 'n') && (NXT(1) == 'o')) {
8954 standalone = 0;
8955 SKIP(2);
8956 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8957 (NXT(2) == 's')) {
8958 standalone = 1;
8959 SKIP(3);
8960 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008961 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008962 }
8963 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008964 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 } else
8966 NEXT;
8967 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008968 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 }
8970 }
8971 return(standalone);
8972}
8973
8974/**
8975 * xmlParseXMLDecl:
8976 * @ctxt: an XML parser context
8977 *
8978 * parse an XML declaration header
8979 *
8980 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8981 */
8982
8983void
8984xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8985 xmlChar *version;
8986
8987 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008988 * This value for standalone indicates that the document has an
8989 * XML declaration but it does not have a standalone attribute.
8990 * It will be overwritten later if a standalone attribute is found.
8991 */
8992 ctxt->input->standalone = -2;
8993
8994 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008995 * We know that '<?xml' is here.
8996 */
8997 SKIP(5);
8998
William M. Brack76e95df2003-10-18 16:20:14 +00008999 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9001 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009002 }
9003 SKIP_BLANKS;
9004
9005 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009006 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009007 */
9008 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009009 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009010 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009011 } else {
9012 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9013 /*
9014 * TODO: Blueberry should be detected here
9015 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009016 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9017 "Unsupported version '%s'\n",
9018 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009019 }
9020 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009021 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009022 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009023 }
Owen Taylor3473f882001-02-23 17:55:21 +00009024
9025 /*
9026 * We may have the encoding declaration
9027 */
William M. Brack76e95df2003-10-18 16:20:14 +00009028 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009029 if ((RAW == '?') && (NXT(1) == '>')) {
9030 SKIP(2);
9031 return;
9032 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009033 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009034 }
9035 xmlParseEncodingDecl(ctxt);
9036 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9037 /*
9038 * The XML REC instructs us to stop parsing right here
9039 */
9040 return;
9041 }
9042
9043 /*
9044 * We may have the standalone status.
9045 */
William M. Brack76e95df2003-10-18 16:20:14 +00009046 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009047 if ((RAW == '?') && (NXT(1) == '>')) {
9048 SKIP(2);
9049 return;
9050 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009052 }
9053 SKIP_BLANKS;
9054 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9055
9056 SKIP_BLANKS;
9057 if ((RAW == '?') && (NXT(1) == '>')) {
9058 SKIP(2);
9059 } else if (RAW == '>') {
9060 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009061 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009062 NEXT;
9063 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009064 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009065 MOVETO_ENDTAG(CUR_PTR);
9066 NEXT;
9067 }
9068}
9069
9070/**
9071 * xmlParseMisc:
9072 * @ctxt: an XML parser context
9073 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009074 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009075 *
9076 * [27] Misc ::= Comment | PI | S
9077 */
9078
9079void
9080xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009081 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009082 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009083 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009084 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009085 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009086 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009087 NEXT;
9088 } else
9089 xmlParseComment(ctxt);
9090 }
9091}
9092
9093/**
9094 * xmlParseDocument:
9095 * @ctxt: an XML parser context
9096 *
9097 * parse an XML document (and build a tree if using the standard SAX
9098 * interface).
9099 *
9100 * [1] document ::= prolog element Misc*
9101 *
9102 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9103 *
9104 * Returns 0, -1 in case of error. the parser context is augmented
9105 * as a result of the parsing.
9106 */
9107
9108int
9109xmlParseDocument(xmlParserCtxtPtr ctxt) {
9110 xmlChar start[4];
9111 xmlCharEncoding enc;
9112
9113 xmlInitParser();
9114
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009115 if ((ctxt == NULL) || (ctxt->input == NULL))
9116 return(-1);
9117
Owen Taylor3473f882001-02-23 17:55:21 +00009118 GROW;
9119
9120 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009121 * SAX: detecting the level.
9122 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009123 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124
9125 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009126 * SAX: beginning of the document processing.
9127 */
9128 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9129 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9130
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009131 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9132 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009133 /*
9134 * Get the 4 first bytes and decode the charset
9135 * if enc != XML_CHAR_ENCODING_NONE
9136 * plug some encoding conversion routines.
9137 */
9138 start[0] = RAW;
9139 start[1] = NXT(1);
9140 start[2] = NXT(2);
9141 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009142 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009143 if (enc != XML_CHAR_ENCODING_NONE) {
9144 xmlSwitchEncoding(ctxt, enc);
9145 }
Owen Taylor3473f882001-02-23 17:55:21 +00009146 }
9147
9148
9149 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009150 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009151 }
9152
9153 /*
9154 * Check for the XMLDecl in the Prolog.
9155 */
9156 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009157 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009158
9159 /*
9160 * Note that we will switch encoding on the fly.
9161 */
9162 xmlParseXMLDecl(ctxt);
9163 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9164 /*
9165 * The XML REC instructs us to stop parsing right here
9166 */
9167 return(-1);
9168 }
9169 ctxt->standalone = ctxt->input->standalone;
9170 SKIP_BLANKS;
9171 } else {
9172 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9173 }
9174 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9175 ctxt->sax->startDocument(ctxt->userData);
9176
9177 /*
9178 * The Misc part of the Prolog
9179 */
9180 GROW;
9181 xmlParseMisc(ctxt);
9182
9183 /*
9184 * Then possibly doc type declaration(s) and more Misc
9185 * (doctypedecl Misc*)?
9186 */
9187 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009188 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009189
9190 ctxt->inSubset = 1;
9191 xmlParseDocTypeDecl(ctxt);
9192 if (RAW == '[') {
9193 ctxt->instate = XML_PARSER_DTD;
9194 xmlParseInternalSubset(ctxt);
9195 }
9196
9197 /*
9198 * Create and update the external subset.
9199 */
9200 ctxt->inSubset = 2;
9201 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9202 (!ctxt->disableSAX))
9203 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9204 ctxt->extSubSystem, ctxt->extSubURI);
9205 ctxt->inSubset = 0;
9206
9207
9208 ctxt->instate = XML_PARSER_PROLOG;
9209 xmlParseMisc(ctxt);
9210 }
9211
9212 /*
9213 * Time to start parsing the tree itself
9214 */
9215 GROW;
9216 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009217 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9218 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009219 } else {
9220 ctxt->instate = XML_PARSER_CONTENT;
9221 xmlParseElement(ctxt);
9222 ctxt->instate = XML_PARSER_EPILOG;
9223
9224
9225 /*
9226 * The Misc part at the end
9227 */
9228 xmlParseMisc(ctxt);
9229
Daniel Veillard561b7f82002-03-20 21:55:57 +00009230 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009231 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009232 }
9233 ctxt->instate = XML_PARSER_EOF;
9234 }
9235
9236 /*
9237 * SAX: end of the document processing.
9238 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009239 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009240 ctxt->sax->endDocument(ctxt->userData);
9241
Daniel Veillard5997aca2002-03-18 18:36:20 +00009242 /*
9243 * Remove locally kept entity definitions if the tree was not built
9244 */
9245 if ((ctxt->myDoc != NULL) &&
9246 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9247 xmlFreeDoc(ctxt->myDoc);
9248 ctxt->myDoc = NULL;
9249 }
9250
Daniel Veillardc7612992002-02-17 22:47:37 +00009251 if (! ctxt->wellFormed) {
9252 ctxt->valid = 0;
9253 return(-1);
9254 }
Owen Taylor3473f882001-02-23 17:55:21 +00009255 return(0);
9256}
9257
9258/**
9259 * xmlParseExtParsedEnt:
9260 * @ctxt: an XML parser context
9261 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009262 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009263 * An external general parsed entity is well-formed if it matches the
9264 * production labeled extParsedEnt.
9265 *
9266 * [78] extParsedEnt ::= TextDecl? content
9267 *
9268 * Returns 0, -1 in case of error. the parser context is augmented
9269 * as a result of the parsing.
9270 */
9271
9272int
9273xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9274 xmlChar start[4];
9275 xmlCharEncoding enc;
9276
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009277 if ((ctxt == NULL) || (ctxt->input == NULL))
9278 return(-1);
9279
Owen Taylor3473f882001-02-23 17:55:21 +00009280 xmlDefaultSAXHandlerInit();
9281
Daniel Veillard309f81d2003-09-23 09:02:53 +00009282 xmlDetectSAX2(ctxt);
9283
Owen Taylor3473f882001-02-23 17:55:21 +00009284 GROW;
9285
9286 /*
9287 * SAX: beginning of the document processing.
9288 */
9289 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9290 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9291
9292 /*
9293 * Get the 4 first bytes and decode the charset
9294 * if enc != XML_CHAR_ENCODING_NONE
9295 * plug some encoding conversion routines.
9296 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009297 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9298 start[0] = RAW;
9299 start[1] = NXT(1);
9300 start[2] = NXT(2);
9301 start[3] = NXT(3);
9302 enc = xmlDetectCharEncoding(start, 4);
9303 if (enc != XML_CHAR_ENCODING_NONE) {
9304 xmlSwitchEncoding(ctxt, enc);
9305 }
Owen Taylor3473f882001-02-23 17:55:21 +00009306 }
9307
9308
9309 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009310 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009311 }
9312
9313 /*
9314 * Check for the XMLDecl in the Prolog.
9315 */
9316 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009317 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009318
9319 /*
9320 * Note that we will switch encoding on the fly.
9321 */
9322 xmlParseXMLDecl(ctxt);
9323 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9324 /*
9325 * The XML REC instructs us to stop parsing right here
9326 */
9327 return(-1);
9328 }
9329 SKIP_BLANKS;
9330 } else {
9331 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9332 }
9333 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9334 ctxt->sax->startDocument(ctxt->userData);
9335
9336 /*
9337 * Doing validity checking on chunk doesn't make sense
9338 */
9339 ctxt->instate = XML_PARSER_CONTENT;
9340 ctxt->validate = 0;
9341 ctxt->loadsubset = 0;
9342 ctxt->depth = 0;
9343
9344 xmlParseContent(ctxt);
9345
9346 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009347 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009348 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009349 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009350 }
9351
9352 /*
9353 * SAX: end of the document processing.
9354 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009356 ctxt->sax->endDocument(ctxt->userData);
9357
9358 if (! ctxt->wellFormed) return(-1);
9359 return(0);
9360}
9361
Daniel Veillard73b013f2003-09-30 12:36:01 +00009362#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009363/************************************************************************
9364 * *
9365 * Progressive parsing interfaces *
9366 * *
9367 ************************************************************************/
9368
9369/**
9370 * xmlParseLookupSequence:
9371 * @ctxt: an XML parser context
9372 * @first: the first char to lookup
9373 * @next: the next char to lookup or zero
9374 * @third: the next char to lookup or zero
9375 *
9376 * Try to find if a sequence (first, next, third) or just (first next) or
9377 * (first) is available in the input stream.
9378 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9379 * to avoid rescanning sequences of bytes, it DOES change the state of the
9380 * parser, do not use liberally.
9381 *
9382 * Returns the index to the current parsing point if the full sequence
9383 * is available, -1 otherwise.
9384 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009385static int
Owen Taylor3473f882001-02-23 17:55:21 +00009386xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9387 xmlChar next, xmlChar third) {
9388 int base, len;
9389 xmlParserInputPtr in;
9390 const xmlChar *buf;
9391
9392 in = ctxt->input;
9393 if (in == NULL) return(-1);
9394 base = in->cur - in->base;
9395 if (base < 0) return(-1);
9396 if (ctxt->checkIndex > base)
9397 base = ctxt->checkIndex;
9398 if (in->buf == NULL) {
9399 buf = in->base;
9400 len = in->length;
9401 } else {
9402 buf = in->buf->buffer->content;
9403 len = in->buf->buffer->use;
9404 }
9405 /* take into account the sequence length */
9406 if (third) len -= 2;
9407 else if (next) len --;
9408 for (;base < len;base++) {
9409 if (buf[base] == first) {
9410 if (third != 0) {
9411 if ((buf[base + 1] != next) ||
9412 (buf[base + 2] != third)) continue;
9413 } else if (next != 0) {
9414 if (buf[base + 1] != next) continue;
9415 }
9416 ctxt->checkIndex = 0;
9417#ifdef DEBUG_PUSH
9418 if (next == 0)
9419 xmlGenericError(xmlGenericErrorContext,
9420 "PP: lookup '%c' found at %d\n",
9421 first, base);
9422 else if (third == 0)
9423 xmlGenericError(xmlGenericErrorContext,
9424 "PP: lookup '%c%c' found at %d\n",
9425 first, next, base);
9426 else
9427 xmlGenericError(xmlGenericErrorContext,
9428 "PP: lookup '%c%c%c' found at %d\n",
9429 first, next, third, base);
9430#endif
9431 return(base - (in->cur - in->base));
9432 }
9433 }
9434 ctxt->checkIndex = base;
9435#ifdef DEBUG_PUSH
9436 if (next == 0)
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: lookup '%c' failed\n", first);
9439 else if (third == 0)
9440 xmlGenericError(xmlGenericErrorContext,
9441 "PP: lookup '%c%c' failed\n", first, next);
9442 else
9443 xmlGenericError(xmlGenericErrorContext,
9444 "PP: lookup '%c%c%c' failed\n", first, next, third);
9445#endif
9446 return(-1);
9447}
9448
9449/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009450 * xmlParseGetLasts:
9451 * @ctxt: an XML parser context
9452 * @lastlt: pointer to store the last '<' from the input
9453 * @lastgt: pointer to store the last '>' from the input
9454 *
9455 * Lookup the last < and > in the current chunk
9456 */
9457static void
9458xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9459 const xmlChar **lastgt) {
9460 const xmlChar *tmp;
9461
9462 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9463 xmlGenericError(xmlGenericErrorContext,
9464 "Internal error: xmlParseGetLasts\n");
9465 return;
9466 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009467 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009468 tmp = ctxt->input->end;
9469 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009470 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009471 if (tmp < ctxt->input->base) {
9472 *lastlt = NULL;
9473 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009474 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009475 *lastlt = tmp;
9476 tmp++;
9477 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9478 if (*tmp == '\'') {
9479 tmp++;
9480 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9481 if (tmp < ctxt->input->end) tmp++;
9482 } else if (*tmp == '"') {
9483 tmp++;
9484 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9485 if (tmp < ctxt->input->end) tmp++;
9486 } else
9487 tmp++;
9488 }
9489 if (tmp < ctxt->input->end)
9490 *lastgt = tmp;
9491 else {
9492 tmp = *lastlt;
9493 tmp--;
9494 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9495 if (tmp >= ctxt->input->base)
9496 *lastgt = tmp;
9497 else
9498 *lastgt = NULL;
9499 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009500 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009501 } else {
9502 *lastlt = NULL;
9503 *lastgt = NULL;
9504 }
9505}
9506/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009507 * xmlCheckCdataPush:
9508 * @cur: pointer to the bock of characters
9509 * @len: length of the block in bytes
9510 *
9511 * Check that the block of characters is okay as SCdata content [20]
9512 *
9513 * Returns the number of bytes to pass if okay, a negative index where an
9514 * UTF-8 error occured otherwise
9515 */
9516static int
9517xmlCheckCdataPush(const xmlChar *utf, int len) {
9518 int ix;
9519 unsigned char c;
9520 int codepoint;
9521
9522 if ((utf == NULL) || (len <= 0))
9523 return(0);
9524
9525 for (ix = 0; ix < len;) { /* string is 0-terminated */
9526 c = utf[ix];
9527 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9528 if (c >= 0x20)
9529 ix++;
9530 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9531 ix++;
9532 else
9533 return(-ix);
9534 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9535 if (ix + 2 > len) return(ix);
9536 if ((utf[ix+1] & 0xc0 ) != 0x80)
9537 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009538 codepoint = (utf[ix] & 0x1f) << 6;
9539 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009540 if (!xmlIsCharQ(codepoint))
9541 return(-ix);
9542 ix += 2;
9543 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9544 if (ix + 3 > len) return(ix);
9545 if (((utf[ix+1] & 0xc0) != 0x80) ||
9546 ((utf[ix+2] & 0xc0) != 0x80))
9547 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009548 codepoint = (utf[ix] & 0xf) << 12;
9549 codepoint |= (utf[ix+1] & 0x3f) << 6;
9550 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009551 if (!xmlIsCharQ(codepoint))
9552 return(-ix);
9553 ix += 3;
9554 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9555 if (ix + 4 > len) return(ix);
9556 if (((utf[ix+1] & 0xc0) != 0x80) ||
9557 ((utf[ix+2] & 0xc0) != 0x80) ||
9558 ((utf[ix+3] & 0xc0) != 0x80))
9559 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009560 codepoint = (utf[ix] & 0x7) << 18;
9561 codepoint |= (utf[ix+1] & 0x3f) << 12;
9562 codepoint |= (utf[ix+2] & 0x3f) << 6;
9563 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009564 if (!xmlIsCharQ(codepoint))
9565 return(-ix);
9566 ix += 4;
9567 } else /* unknown encoding */
9568 return(-ix);
9569 }
9570 return(ix);
9571}
9572
9573/**
Owen Taylor3473f882001-02-23 17:55:21 +00009574 * xmlParseTryOrFinish:
9575 * @ctxt: an XML parser context
9576 * @terminate: last chunk indicator
9577 *
9578 * Try to progress on parsing
9579 *
9580 * Returns zero if no parsing was possible
9581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009582static int
Owen Taylor3473f882001-02-23 17:55:21 +00009583xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9584 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009585 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009586 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009588
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009589 if (ctxt->input == NULL)
9590 return(0);
9591
Owen Taylor3473f882001-02-23 17:55:21 +00009592#ifdef DEBUG_PUSH
9593 switch (ctxt->instate) {
9594 case XML_PARSER_EOF:
9595 xmlGenericError(xmlGenericErrorContext,
9596 "PP: try EOF\n"); break;
9597 case XML_PARSER_START:
9598 xmlGenericError(xmlGenericErrorContext,
9599 "PP: try START\n"); break;
9600 case XML_PARSER_MISC:
9601 xmlGenericError(xmlGenericErrorContext,
9602 "PP: try MISC\n");break;
9603 case XML_PARSER_COMMENT:
9604 xmlGenericError(xmlGenericErrorContext,
9605 "PP: try COMMENT\n");break;
9606 case XML_PARSER_PROLOG:
9607 xmlGenericError(xmlGenericErrorContext,
9608 "PP: try PROLOG\n");break;
9609 case XML_PARSER_START_TAG:
9610 xmlGenericError(xmlGenericErrorContext,
9611 "PP: try START_TAG\n");break;
9612 case XML_PARSER_CONTENT:
9613 xmlGenericError(xmlGenericErrorContext,
9614 "PP: try CONTENT\n");break;
9615 case XML_PARSER_CDATA_SECTION:
9616 xmlGenericError(xmlGenericErrorContext,
9617 "PP: try CDATA_SECTION\n");break;
9618 case XML_PARSER_END_TAG:
9619 xmlGenericError(xmlGenericErrorContext,
9620 "PP: try END_TAG\n");break;
9621 case XML_PARSER_ENTITY_DECL:
9622 xmlGenericError(xmlGenericErrorContext,
9623 "PP: try ENTITY_DECL\n");break;
9624 case XML_PARSER_ENTITY_VALUE:
9625 xmlGenericError(xmlGenericErrorContext,
9626 "PP: try ENTITY_VALUE\n");break;
9627 case XML_PARSER_ATTRIBUTE_VALUE:
9628 xmlGenericError(xmlGenericErrorContext,
9629 "PP: try ATTRIBUTE_VALUE\n");break;
9630 case XML_PARSER_DTD:
9631 xmlGenericError(xmlGenericErrorContext,
9632 "PP: try DTD\n");break;
9633 case XML_PARSER_EPILOG:
9634 xmlGenericError(xmlGenericErrorContext,
9635 "PP: try EPILOG\n");break;
9636 case XML_PARSER_PI:
9637 xmlGenericError(xmlGenericErrorContext,
9638 "PP: try PI\n");break;
9639 case XML_PARSER_IGNORE:
9640 xmlGenericError(xmlGenericErrorContext,
9641 "PP: try IGNORE\n");break;
9642 }
9643#endif
9644
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009645 if ((ctxt->input != NULL) &&
9646 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009647 xmlSHRINK(ctxt);
9648 ctxt->checkIndex = 0;
9649 }
9650 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009651
Daniel Veillarda880b122003-04-21 21:36:41 +00009652 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009653 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009654 return(0);
9655
9656
Owen Taylor3473f882001-02-23 17:55:21 +00009657 /*
9658 * Pop-up of finished entities.
9659 */
9660 while ((RAW == 0) && (ctxt->inputNr > 1))
9661 xmlPopInput(ctxt);
9662
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009663 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009664 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009665 avail = ctxt->input->length -
9666 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009667 else {
9668 /*
9669 * If we are operating on converted input, try to flush
9670 * remainng chars to avoid them stalling in the non-converted
9671 * buffer.
9672 */
9673 if ((ctxt->input->buf->raw != NULL) &&
9674 (ctxt->input->buf->raw->use > 0)) {
9675 int base = ctxt->input->base -
9676 ctxt->input->buf->buffer->content;
9677 int current = ctxt->input->cur - ctxt->input->base;
9678
9679 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9680 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9681 ctxt->input->cur = ctxt->input->base + current;
9682 ctxt->input->end =
9683 &ctxt->input->buf->buffer->content[
9684 ctxt->input->buf->buffer->use];
9685 }
9686 avail = ctxt->input->buf->buffer->use -
9687 (ctxt->input->cur - ctxt->input->base);
9688 }
Owen Taylor3473f882001-02-23 17:55:21 +00009689 if (avail < 1)
9690 goto done;
9691 switch (ctxt->instate) {
9692 case XML_PARSER_EOF:
9693 /*
9694 * Document parsing is done !
9695 */
9696 goto done;
9697 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009698 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9699 xmlChar start[4];
9700 xmlCharEncoding enc;
9701
9702 /*
9703 * Very first chars read from the document flow.
9704 */
9705 if (avail < 4)
9706 goto done;
9707
9708 /*
9709 * Get the 4 first bytes and decode the charset
9710 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009711 * plug some encoding conversion routines,
9712 * else xmlSwitchEncoding will set to (default)
9713 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009714 */
9715 start[0] = RAW;
9716 start[1] = NXT(1);
9717 start[2] = NXT(2);
9718 start[3] = NXT(3);
9719 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009720 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009721 break;
9722 }
Owen Taylor3473f882001-02-23 17:55:21 +00009723
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009724 if (avail < 2)
9725 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009726 cur = ctxt->input->cur[0];
9727 next = ctxt->input->cur[1];
9728 if (cur == 0) {
9729 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9730 ctxt->sax->setDocumentLocator(ctxt->userData,
9731 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009732 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009733 ctxt->instate = XML_PARSER_EOF;
9734#ifdef DEBUG_PUSH
9735 xmlGenericError(xmlGenericErrorContext,
9736 "PP: entering EOF\n");
9737#endif
9738 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9739 ctxt->sax->endDocument(ctxt->userData);
9740 goto done;
9741 }
9742 if ((cur == '<') && (next == '?')) {
9743 /* PI or XML decl */
9744 if (avail < 5) return(ret);
9745 if ((!terminate) &&
9746 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9747 return(ret);
9748 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9749 ctxt->sax->setDocumentLocator(ctxt->userData,
9750 &xmlDefaultSAXLocator);
9751 if ((ctxt->input->cur[2] == 'x') &&
9752 (ctxt->input->cur[3] == 'm') &&
9753 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009754 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009755 ret += 5;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: Parsing XML Decl\n");
9759#endif
9760 xmlParseXMLDecl(ctxt);
9761 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9762 /*
9763 * The XML REC instructs us to stop parsing right
9764 * here
9765 */
9766 ctxt->instate = XML_PARSER_EOF;
9767 return(0);
9768 }
9769 ctxt->standalone = ctxt->input->standalone;
9770 if ((ctxt->encoding == NULL) &&
9771 (ctxt->input->encoding != NULL))
9772 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9773 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9774 (!ctxt->disableSAX))
9775 ctxt->sax->startDocument(ctxt->userData);
9776 ctxt->instate = XML_PARSER_MISC;
9777#ifdef DEBUG_PUSH
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: entering MISC\n");
9780#endif
9781 } else {
9782 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9783 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9784 (!ctxt->disableSAX))
9785 ctxt->sax->startDocument(ctxt->userData);
9786 ctxt->instate = XML_PARSER_MISC;
9787#ifdef DEBUG_PUSH
9788 xmlGenericError(xmlGenericErrorContext,
9789 "PP: entering MISC\n");
9790#endif
9791 }
9792 } else {
9793 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9794 ctxt->sax->setDocumentLocator(ctxt->userData,
9795 &xmlDefaultSAXLocator);
9796 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009797 if (ctxt->version == NULL) {
9798 xmlErrMemory(ctxt, NULL);
9799 break;
9800 }
Owen Taylor3473f882001-02-23 17:55:21 +00009801 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9802 (!ctxt->disableSAX))
9803 ctxt->sax->startDocument(ctxt->userData);
9804 ctxt->instate = XML_PARSER_MISC;
9805#ifdef DEBUG_PUSH
9806 xmlGenericError(xmlGenericErrorContext,
9807 "PP: entering MISC\n");
9808#endif
9809 }
9810 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009811 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009812 const xmlChar *name;
9813 const xmlChar *prefix;
9814 const xmlChar *URI;
9815 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009816
9817 if ((avail < 2) && (ctxt->inputNr == 1))
9818 goto done;
9819 cur = ctxt->input->cur[0];
9820 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009821 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009822 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009823 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9824 ctxt->sax->endDocument(ctxt->userData);
9825 goto done;
9826 }
9827 if (!terminate) {
9828 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009829 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009830 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009831 goto done;
9832 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9833 goto done;
9834 }
9835 }
9836 if (ctxt->spaceNr == 0)
9837 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009838 else if (*ctxt->space == -2)
9839 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009840 else
9841 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009842#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009843 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009844#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009845 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009846#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009847 else
9848 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009849#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 if (name == NULL) {
9851 spacePop(ctxt);
9852 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009853 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9854 ctxt->sax->endDocument(ctxt->userData);
9855 goto done;
9856 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009857#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009858 /*
9859 * [ VC: Root Element Type ]
9860 * The Name in the document type declaration must match
9861 * the element type of the root element.
9862 */
9863 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9864 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9865 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009866#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009867
9868 /*
9869 * Check for an Empty Element.
9870 */
9871 if ((RAW == '/') && (NXT(1) == '>')) {
9872 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009873
9874 if (ctxt->sax2) {
9875 if ((ctxt->sax != NULL) &&
9876 (ctxt->sax->endElementNs != NULL) &&
9877 (!ctxt->disableSAX))
9878 ctxt->sax->endElementNs(ctxt->userData, name,
9879 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009880 if (ctxt->nsNr - nsNr > 0)
9881 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009882#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009883 } else {
9884 if ((ctxt->sax != NULL) &&
9885 (ctxt->sax->endElement != NULL) &&
9886 (!ctxt->disableSAX))
9887 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009888#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009889 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009890 spacePop(ctxt);
9891 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009892 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009893 } else {
9894 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009895 }
9896 break;
9897 }
9898 if (RAW == '>') {
9899 NEXT;
9900 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009901 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 "Couldn't find end of Start Tag %s\n",
9903 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009904 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009905 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009906 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009907 if (ctxt->sax2)
9908 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009909#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009910 else
9911 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009912#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009913
Daniel Veillarda880b122003-04-21 21:36:41 +00009914 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009915 break;
9916 }
9917 case XML_PARSER_CONTENT: {
9918 const xmlChar *test;
9919 unsigned int cons;
9920 if ((avail < 2) && (ctxt->inputNr == 1))
9921 goto done;
9922 cur = ctxt->input->cur[0];
9923 next = ctxt->input->cur[1];
9924
9925 test = CUR_PTR;
9926 cons = ctxt->input->consumed;
9927 if ((cur == '<') && (next == '/')) {
9928 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009929 break;
9930 } else if ((cur == '<') && (next == '?')) {
9931 if ((!terminate) &&
9932 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9933 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009934 xmlParsePI(ctxt);
9935 } else if ((cur == '<') && (next != '!')) {
9936 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009937 break;
9938 } else if ((cur == '<') && (next == '!') &&
9939 (ctxt->input->cur[2] == '-') &&
9940 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009941 int term;
9942
9943 if (avail < 4)
9944 goto done;
9945 ctxt->input->cur += 4;
9946 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9947 ctxt->input->cur -= 4;
9948 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009949 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009950 xmlParseComment(ctxt);
9951 ctxt->instate = XML_PARSER_CONTENT;
9952 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9953 (ctxt->input->cur[2] == '[') &&
9954 (ctxt->input->cur[3] == 'C') &&
9955 (ctxt->input->cur[4] == 'D') &&
9956 (ctxt->input->cur[5] == 'A') &&
9957 (ctxt->input->cur[6] == 'T') &&
9958 (ctxt->input->cur[7] == 'A') &&
9959 (ctxt->input->cur[8] == '[')) {
9960 SKIP(9);
9961 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009962 break;
9963 } else if ((cur == '<') && (next == '!') &&
9964 (avail < 9)) {
9965 goto done;
9966 } else if (cur == '&') {
9967 if ((!terminate) &&
9968 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9969 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009970 xmlParseReference(ctxt);
9971 } else {
9972 /* TODO Avoid the extra copy, handle directly !!! */
9973 /*
9974 * Goal of the following test is:
9975 * - minimize calls to the SAX 'character' callback
9976 * when they are mergeable
9977 * - handle an problem for isBlank when we only parse
9978 * a sequence of blank chars and the next one is
9979 * not available to check against '<' presence.
9980 * - tries to homogenize the differences in SAX
9981 * callbacks between the push and pull versions
9982 * of the parser.
9983 */
9984 if ((ctxt->inputNr == 1) &&
9985 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9986 if (!terminate) {
9987 if (ctxt->progressive) {
9988 if ((lastlt == NULL) ||
9989 (ctxt->input->cur > lastlt))
9990 goto done;
9991 } else if (xmlParseLookupSequence(ctxt,
9992 '<', 0, 0) < 0) {
9993 goto done;
9994 }
9995 }
9996 }
9997 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009998 xmlParseCharData(ctxt, 0);
9999 }
10000 /*
10001 * Pop-up of finished entities.
10002 */
10003 while ((RAW == 0) && (ctxt->inputNr > 1))
10004 xmlPopInput(ctxt);
10005 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010006 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10007 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010008 ctxt->instate = XML_PARSER_EOF;
10009 break;
10010 }
10011 break;
10012 }
10013 case XML_PARSER_END_TAG:
10014 if (avail < 2)
10015 goto done;
10016 if (!terminate) {
10017 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010018 /* > can be found unescaped in attribute values */
10019 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 goto done;
10021 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10022 goto done;
10023 }
10024 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010025 if (ctxt->sax2) {
10026 xmlParseEndTag2(ctxt,
10027 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10028 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010029 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010030 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010031 }
10032#ifdef LIBXML_SAX1_ENABLED
10033 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010034 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010035#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010036 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010037 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010038 } else {
10039 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010040 }
10041 break;
10042 case XML_PARSER_CDATA_SECTION: {
10043 /*
10044 * The Push mode need to have the SAX callback for
10045 * cdataBlock merge back contiguous callbacks.
10046 */
10047 int base;
10048
10049 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10050 if (base < 0) {
10051 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010052 int tmp;
10053
10054 tmp = xmlCheckCdataPush(ctxt->input->cur,
10055 XML_PARSER_BIG_BUFFER_SIZE);
10056 if (tmp < 0) {
10057 tmp = -tmp;
10058 ctxt->input->cur += tmp;
10059 goto encoding_error;
10060 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010061 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10062 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010063 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010064 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010065 else if (ctxt->sax->characters != NULL)
10066 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010067 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010068 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010069 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010070 ctxt->checkIndex = 0;
10071 }
10072 goto done;
10073 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010074 int tmp;
10075
10076 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10077 if ((tmp < 0) || (tmp != base)) {
10078 tmp = -tmp;
10079 ctxt->input->cur += tmp;
10080 goto encoding_error;
10081 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010082 if ((ctxt->sax != NULL) && (base > 0) &&
10083 (!ctxt->disableSAX)) {
10084 if (ctxt->sax->cdataBlock != NULL)
10085 ctxt->sax->cdataBlock(ctxt->userData,
10086 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010087 else if (ctxt->sax->characters != NULL)
10088 ctxt->sax->characters(ctxt->userData,
10089 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010090 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010091 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010092 ctxt->checkIndex = 0;
10093 ctxt->instate = XML_PARSER_CONTENT;
10094#ifdef DEBUG_PUSH
10095 xmlGenericError(xmlGenericErrorContext,
10096 "PP: entering CONTENT\n");
10097#endif
10098 }
10099 break;
10100 }
Owen Taylor3473f882001-02-23 17:55:21 +000010101 case XML_PARSER_MISC:
10102 SKIP_BLANKS;
10103 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010104 avail = ctxt->input->length -
10105 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010106 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010107 avail = ctxt->input->buf->buffer->use -
10108 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010109 if (avail < 2)
10110 goto done;
10111 cur = ctxt->input->cur[0];
10112 next = ctxt->input->cur[1];
10113 if ((cur == '<') && (next == '?')) {
10114 if ((!terminate) &&
10115 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10116 goto done;
10117#ifdef DEBUG_PUSH
10118 xmlGenericError(xmlGenericErrorContext,
10119 "PP: Parsing PI\n");
10120#endif
10121 xmlParsePI(ctxt);
10122 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010123 (ctxt->input->cur[2] == '-') &&
10124 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010125 if ((!terminate) &&
10126 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10127 goto done;
10128#ifdef DEBUG_PUSH
10129 xmlGenericError(xmlGenericErrorContext,
10130 "PP: Parsing Comment\n");
10131#endif
10132 xmlParseComment(ctxt);
10133 ctxt->instate = XML_PARSER_MISC;
10134 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010135 (ctxt->input->cur[2] == 'D') &&
10136 (ctxt->input->cur[3] == 'O') &&
10137 (ctxt->input->cur[4] == 'C') &&
10138 (ctxt->input->cur[5] == 'T') &&
10139 (ctxt->input->cur[6] == 'Y') &&
10140 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010141 (ctxt->input->cur[8] == 'E')) {
10142 if ((!terminate) &&
10143 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10144 goto done;
10145#ifdef DEBUG_PUSH
10146 xmlGenericError(xmlGenericErrorContext,
10147 "PP: Parsing internal subset\n");
10148#endif
10149 ctxt->inSubset = 1;
10150 xmlParseDocTypeDecl(ctxt);
10151 if (RAW == '[') {
10152 ctxt->instate = XML_PARSER_DTD;
10153#ifdef DEBUG_PUSH
10154 xmlGenericError(xmlGenericErrorContext,
10155 "PP: entering DTD\n");
10156#endif
10157 } else {
10158 /*
10159 * Create and update the external subset.
10160 */
10161 ctxt->inSubset = 2;
10162 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10163 (ctxt->sax->externalSubset != NULL))
10164 ctxt->sax->externalSubset(ctxt->userData,
10165 ctxt->intSubName, ctxt->extSubSystem,
10166 ctxt->extSubURI);
10167 ctxt->inSubset = 0;
10168 ctxt->instate = XML_PARSER_PROLOG;
10169#ifdef DEBUG_PUSH
10170 xmlGenericError(xmlGenericErrorContext,
10171 "PP: entering PROLOG\n");
10172#endif
10173 }
10174 } else if ((cur == '<') && (next == '!') &&
10175 (avail < 9)) {
10176 goto done;
10177 } else {
10178 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010179 ctxt->progressive = 1;
10180 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010181#ifdef DEBUG_PUSH
10182 xmlGenericError(xmlGenericErrorContext,
10183 "PP: entering START_TAG\n");
10184#endif
10185 }
10186 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010187 case XML_PARSER_PROLOG:
10188 SKIP_BLANKS;
10189 if (ctxt->input->buf == NULL)
10190 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10191 else
10192 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10193 if (avail < 2)
10194 goto done;
10195 cur = ctxt->input->cur[0];
10196 next = ctxt->input->cur[1];
10197 if ((cur == '<') && (next == '?')) {
10198 if ((!terminate) &&
10199 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10200 goto done;
10201#ifdef DEBUG_PUSH
10202 xmlGenericError(xmlGenericErrorContext,
10203 "PP: Parsing PI\n");
10204#endif
10205 xmlParsePI(ctxt);
10206 } else if ((cur == '<') && (next == '!') &&
10207 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10208 if ((!terminate) &&
10209 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10210 goto done;
10211#ifdef DEBUG_PUSH
10212 xmlGenericError(xmlGenericErrorContext,
10213 "PP: Parsing Comment\n");
10214#endif
10215 xmlParseComment(ctxt);
10216 ctxt->instate = XML_PARSER_PROLOG;
10217 } else if ((cur == '<') && (next == '!') &&
10218 (avail < 4)) {
10219 goto done;
10220 } else {
10221 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010222 if (ctxt->progressive == 0)
10223 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010224 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010225#ifdef DEBUG_PUSH
10226 xmlGenericError(xmlGenericErrorContext,
10227 "PP: entering START_TAG\n");
10228#endif
10229 }
10230 break;
10231 case XML_PARSER_EPILOG:
10232 SKIP_BLANKS;
10233 if (ctxt->input->buf == NULL)
10234 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10235 else
10236 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10237 if (avail < 2)
10238 goto done;
10239 cur = ctxt->input->cur[0];
10240 next = ctxt->input->cur[1];
10241 if ((cur == '<') && (next == '?')) {
10242 if ((!terminate) &&
10243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10244 goto done;
10245#ifdef DEBUG_PUSH
10246 xmlGenericError(xmlGenericErrorContext,
10247 "PP: Parsing PI\n");
10248#endif
10249 xmlParsePI(ctxt);
10250 ctxt->instate = XML_PARSER_EPILOG;
10251 } else if ((cur == '<') && (next == '!') &&
10252 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10253 if ((!terminate) &&
10254 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10255 goto done;
10256#ifdef DEBUG_PUSH
10257 xmlGenericError(xmlGenericErrorContext,
10258 "PP: Parsing Comment\n");
10259#endif
10260 xmlParseComment(ctxt);
10261 ctxt->instate = XML_PARSER_EPILOG;
10262 } else if ((cur == '<') && (next == '!') &&
10263 (avail < 4)) {
10264 goto done;
10265 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010266 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010267 ctxt->instate = XML_PARSER_EOF;
10268#ifdef DEBUG_PUSH
10269 xmlGenericError(xmlGenericErrorContext,
10270 "PP: entering EOF\n");
10271#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010273 ctxt->sax->endDocument(ctxt->userData);
10274 goto done;
10275 }
10276 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010277 case XML_PARSER_DTD: {
10278 /*
10279 * Sorry but progressive parsing of the internal subset
10280 * is not expected to be supported. We first check that
10281 * the full content of the internal subset is available and
10282 * the parsing is launched only at that point.
10283 * Internal subset ends up with "']' S? '>'" in an unescaped
10284 * section and not in a ']]>' sequence which are conditional
10285 * sections (whoever argued to keep that crap in XML deserve
10286 * a place in hell !).
10287 */
10288 int base, i;
10289 xmlChar *buf;
10290 xmlChar quote = 0;
10291
10292 base = ctxt->input->cur - ctxt->input->base;
10293 if (base < 0) return(0);
10294 if (ctxt->checkIndex > base)
10295 base = ctxt->checkIndex;
10296 buf = ctxt->input->buf->buffer->content;
10297 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10298 base++) {
10299 if (quote != 0) {
10300 if (buf[base] == quote)
10301 quote = 0;
10302 continue;
10303 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010304 if ((quote == 0) && (buf[base] == '<')) {
10305 int found = 0;
10306 /* special handling of comments */
10307 if (((unsigned int) base + 4 <
10308 ctxt->input->buf->buffer->use) &&
10309 (buf[base + 1] == '!') &&
10310 (buf[base + 2] == '-') &&
10311 (buf[base + 3] == '-')) {
10312 for (;(unsigned int) base + 3 <
10313 ctxt->input->buf->buffer->use; base++) {
10314 if ((buf[base] == '-') &&
10315 (buf[base + 1] == '-') &&
10316 (buf[base + 2] == '>')) {
10317 found = 1;
10318 base += 2;
10319 break;
10320 }
10321 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010322 if (!found) {
10323#if 0
10324 fprintf(stderr, "unfinished comment\n");
10325#endif
10326 break; /* for */
10327 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010328 continue;
10329 }
10330 }
Owen Taylor3473f882001-02-23 17:55:21 +000010331 if (buf[base] == '"') {
10332 quote = '"';
10333 continue;
10334 }
10335 if (buf[base] == '\'') {
10336 quote = '\'';
10337 continue;
10338 }
10339 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010340#if 0
10341 fprintf(stderr, "%c%c%c%c: ", buf[base],
10342 buf[base + 1], buf[base + 2], buf[base + 3]);
10343#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010344 if ((unsigned int) base +1 >=
10345 ctxt->input->buf->buffer->use)
10346 break;
10347 if (buf[base + 1] == ']') {
10348 /* conditional crap, skip both ']' ! */
10349 base++;
10350 continue;
10351 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010352 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010353 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10354 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010355 if (buf[base + i] == '>') {
10356#if 0
10357 fprintf(stderr, "found\n");
10358#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010359 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010360 }
10361 if (!IS_BLANK_CH(buf[base + i])) {
10362#if 0
10363 fprintf(stderr, "not found\n");
10364#endif
10365 goto not_end_of_int_subset;
10366 }
Owen Taylor3473f882001-02-23 17:55:21 +000010367 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010368#if 0
10369 fprintf(stderr, "end of stream\n");
10370#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010371 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010372
Owen Taylor3473f882001-02-23 17:55:21 +000010373 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010374not_end_of_int_subset:
10375 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010376 }
10377 /*
10378 * We didn't found the end of the Internal subset
10379 */
Owen Taylor3473f882001-02-23 17:55:21 +000010380#ifdef DEBUG_PUSH
10381 if (next == 0)
10382 xmlGenericError(xmlGenericErrorContext,
10383 "PP: lookup of int subset end filed\n");
10384#endif
10385 goto done;
10386
10387found_end_int_subset:
10388 xmlParseInternalSubset(ctxt);
10389 ctxt->inSubset = 2;
10390 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10391 (ctxt->sax->externalSubset != NULL))
10392 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10393 ctxt->extSubSystem, ctxt->extSubURI);
10394 ctxt->inSubset = 0;
10395 ctxt->instate = XML_PARSER_PROLOG;
10396 ctxt->checkIndex = 0;
10397#ifdef DEBUG_PUSH
10398 xmlGenericError(xmlGenericErrorContext,
10399 "PP: entering PROLOG\n");
10400#endif
10401 break;
10402 }
10403 case XML_PARSER_COMMENT:
10404 xmlGenericError(xmlGenericErrorContext,
10405 "PP: internal error, state == COMMENT\n");
10406 ctxt->instate = XML_PARSER_CONTENT;
10407#ifdef DEBUG_PUSH
10408 xmlGenericError(xmlGenericErrorContext,
10409 "PP: entering CONTENT\n");
10410#endif
10411 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010412 case XML_PARSER_IGNORE:
10413 xmlGenericError(xmlGenericErrorContext,
10414 "PP: internal error, state == IGNORE");
10415 ctxt->instate = XML_PARSER_DTD;
10416#ifdef DEBUG_PUSH
10417 xmlGenericError(xmlGenericErrorContext,
10418 "PP: entering DTD\n");
10419#endif
10420 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010421 case XML_PARSER_PI:
10422 xmlGenericError(xmlGenericErrorContext,
10423 "PP: internal error, state == PI\n");
10424 ctxt->instate = XML_PARSER_CONTENT;
10425#ifdef DEBUG_PUSH
10426 xmlGenericError(xmlGenericErrorContext,
10427 "PP: entering CONTENT\n");
10428#endif
10429 break;
10430 case XML_PARSER_ENTITY_DECL:
10431 xmlGenericError(xmlGenericErrorContext,
10432 "PP: internal error, state == ENTITY_DECL\n");
10433 ctxt->instate = XML_PARSER_DTD;
10434#ifdef DEBUG_PUSH
10435 xmlGenericError(xmlGenericErrorContext,
10436 "PP: entering DTD\n");
10437#endif
10438 break;
10439 case XML_PARSER_ENTITY_VALUE:
10440 xmlGenericError(xmlGenericErrorContext,
10441 "PP: internal error, state == ENTITY_VALUE\n");
10442 ctxt->instate = XML_PARSER_CONTENT;
10443#ifdef DEBUG_PUSH
10444 xmlGenericError(xmlGenericErrorContext,
10445 "PP: entering DTD\n");
10446#endif
10447 break;
10448 case XML_PARSER_ATTRIBUTE_VALUE:
10449 xmlGenericError(xmlGenericErrorContext,
10450 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10451 ctxt->instate = XML_PARSER_START_TAG;
10452#ifdef DEBUG_PUSH
10453 xmlGenericError(xmlGenericErrorContext,
10454 "PP: entering START_TAG\n");
10455#endif
10456 break;
10457 case XML_PARSER_SYSTEM_LITERAL:
10458 xmlGenericError(xmlGenericErrorContext,
10459 "PP: internal error, state == SYSTEM_LITERAL\n");
10460 ctxt->instate = XML_PARSER_START_TAG;
10461#ifdef DEBUG_PUSH
10462 xmlGenericError(xmlGenericErrorContext,
10463 "PP: entering START_TAG\n");
10464#endif
10465 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010466 case XML_PARSER_PUBLIC_LITERAL:
10467 xmlGenericError(xmlGenericErrorContext,
10468 "PP: internal error, state == PUBLIC_LITERAL\n");
10469 ctxt->instate = XML_PARSER_START_TAG;
10470#ifdef DEBUG_PUSH
10471 xmlGenericError(xmlGenericErrorContext,
10472 "PP: entering START_TAG\n");
10473#endif
10474 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
10476 }
10477done:
10478#ifdef DEBUG_PUSH
10479 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10480#endif
10481 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010482encoding_error:
10483 {
10484 char buffer[150];
10485
10486 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10487 ctxt->input->cur[0], ctxt->input->cur[1],
10488 ctxt->input->cur[2], ctxt->input->cur[3]);
10489 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10490 "Input is not proper UTF-8, indicate encoding !\n%s",
10491 BAD_CAST buffer, NULL);
10492 }
10493 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010494}
10495
10496/**
Owen Taylor3473f882001-02-23 17:55:21 +000010497 * xmlParseChunk:
10498 * @ctxt: an XML parser context
10499 * @chunk: an char array
10500 * @size: the size in byte of the chunk
10501 * @terminate: last chunk indicator
10502 *
10503 * Parse a Chunk of memory
10504 *
10505 * Returns zero if no error, the xmlParserErrors otherwise.
10506 */
10507int
10508xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10509 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010510 int end_in_lf = 0;
10511
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010512 if (ctxt == NULL)
10513 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010514 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010515 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010516 if (ctxt->instate == XML_PARSER_START)
10517 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010518 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10519 (chunk[size - 1] == '\r')) {
10520 end_in_lf = 1;
10521 size--;
10522 }
Owen Taylor3473f882001-02-23 17:55:21 +000010523 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10524 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10525 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10526 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010527 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010528
William M. Bracka3215c72004-07-31 16:24:01 +000010529 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10530 if (res < 0) {
10531 ctxt->errNo = XML_PARSER_EOF;
10532 ctxt->disableSAX = 1;
10533 return (XML_PARSER_EOF);
10534 }
Owen Taylor3473f882001-02-23 17:55:21 +000010535 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10536 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010537 ctxt->input->end =
10538 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010539#ifdef DEBUG_PUSH
10540 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10541#endif
10542
Owen Taylor3473f882001-02-23 17:55:21 +000010543 } else if (ctxt->instate != XML_PARSER_EOF) {
10544 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10545 xmlParserInputBufferPtr in = ctxt->input->buf;
10546 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10547 (in->raw != NULL)) {
10548 int nbchars;
10549
10550 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10551 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010552 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010553 xmlGenericError(xmlGenericErrorContext,
10554 "xmlParseChunk: encoder error\n");
10555 return(XML_ERR_INVALID_ENCODING);
10556 }
10557 }
10558 }
10559 }
10560 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010561 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10562 (ctxt->input->buf != NULL)) {
10563 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10564 }
Daniel Veillard14412512005-01-21 23:53:26 +000010565 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010566 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010567 if (terminate) {
10568 /*
10569 * Check for termination
10570 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010571 int avail = 0;
10572
10573 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010574 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010575 avail = ctxt->input->length -
10576 (ctxt->input->cur - ctxt->input->base);
10577 else
10578 avail = ctxt->input->buf->buffer->use -
10579 (ctxt->input->cur - ctxt->input->base);
10580 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010581
Owen Taylor3473f882001-02-23 17:55:21 +000010582 if ((ctxt->instate != XML_PARSER_EOF) &&
10583 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010584 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010585 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010586 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010587 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010588 }
Owen Taylor3473f882001-02-23 17:55:21 +000010589 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010590 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010591 ctxt->sax->endDocument(ctxt->userData);
10592 }
10593 ctxt->instate = XML_PARSER_EOF;
10594 }
10595 return((xmlParserErrors) ctxt->errNo);
10596}
10597
10598/************************************************************************
10599 * *
10600 * I/O front end functions to the parser *
10601 * *
10602 ************************************************************************/
10603
10604/**
Owen Taylor3473f882001-02-23 17:55:21 +000010605 * xmlCreatePushParserCtxt:
10606 * @sax: a SAX handler
10607 * @user_data: The user data returned on SAX callbacks
10608 * @chunk: a pointer to an array of chars
10609 * @size: number of chars in the array
10610 * @filename: an optional file name or URI
10611 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010612 * Create a parser context for using the XML parser in push mode.
10613 * If @buffer and @size are non-NULL, the data is used to detect
10614 * the encoding. The remaining characters will be parsed so they
10615 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010616 * To allow content encoding detection, @size should be >= 4
10617 * The value of @filename is used for fetching external entities
10618 * and error/warning reports.
10619 *
10620 * Returns the new parser context or NULL
10621 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010622
Owen Taylor3473f882001-02-23 17:55:21 +000010623xmlParserCtxtPtr
10624xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10625 const char *chunk, int size, const char *filename) {
10626 xmlParserCtxtPtr ctxt;
10627 xmlParserInputPtr inputStream;
10628 xmlParserInputBufferPtr buf;
10629 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10630
10631 /*
10632 * plug some encoding conversion routines
10633 */
10634 if ((chunk != NULL) && (size >= 4))
10635 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10636
10637 buf = xmlAllocParserInputBuffer(enc);
10638 if (buf == NULL) return(NULL);
10639
10640 ctxt = xmlNewParserCtxt();
10641 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010642 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010643 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010644 return(NULL);
10645 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010646 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010647 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10648 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010649 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010650 xmlFreeParserInputBuffer(buf);
10651 xmlFreeParserCtxt(ctxt);
10652 return(NULL);
10653 }
Owen Taylor3473f882001-02-23 17:55:21 +000010654 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010655#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010656 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010657#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010658 xmlFree(ctxt->sax);
10659 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10660 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010661 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010662 xmlFreeParserInputBuffer(buf);
10663 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010664 return(NULL);
10665 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010666 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10667 if (sax->initialized == XML_SAX2_MAGIC)
10668 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10669 else
10670 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010671 if (user_data != NULL)
10672 ctxt->userData = user_data;
10673 }
10674 if (filename == NULL) {
10675 ctxt->directory = NULL;
10676 } else {
10677 ctxt->directory = xmlParserGetDirectory(filename);
10678 }
10679
10680 inputStream = xmlNewInputStream(ctxt);
10681 if (inputStream == NULL) {
10682 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010683 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010684 return(NULL);
10685 }
10686
10687 if (filename == NULL)
10688 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010689 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010690 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010691 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010692 if (inputStream->filename == NULL) {
10693 xmlFreeParserCtxt(ctxt);
10694 xmlFreeParserInputBuffer(buf);
10695 return(NULL);
10696 }
10697 }
Owen Taylor3473f882001-02-23 17:55:21 +000010698 inputStream->buf = buf;
10699 inputStream->base = inputStream->buf->buffer->content;
10700 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010701 inputStream->end =
10702 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010703
10704 inputPush(ctxt, inputStream);
10705
William M. Brack3a1cd212005-02-11 14:35:54 +000010706 /*
10707 * If the caller didn't provide an initial 'chunk' for determining
10708 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10709 * that it can be automatically determined later
10710 */
10711 if ((size == 0) || (chunk == NULL)) {
10712 ctxt->charset = XML_CHAR_ENCODING_NONE;
10713 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010714 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10715 int cur = ctxt->input->cur - ctxt->input->base;
10716
Owen Taylor3473f882001-02-23 17:55:21 +000010717 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010718
10719 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10720 ctxt->input->cur = ctxt->input->base + cur;
10721 ctxt->input->end =
10722 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010723#ifdef DEBUG_PUSH
10724 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10725#endif
10726 }
10727
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010728 if (enc != XML_CHAR_ENCODING_NONE) {
10729 xmlSwitchEncoding(ctxt, enc);
10730 }
10731
Owen Taylor3473f882001-02-23 17:55:21 +000010732 return(ctxt);
10733}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010734#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010735
10736/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010737 * xmlStopParser:
10738 * @ctxt: an XML parser context
10739 *
10740 * Blocks further parser processing
10741 */
10742void
10743xmlStopParser(xmlParserCtxtPtr ctxt) {
10744 if (ctxt == NULL)
10745 return;
10746 ctxt->instate = XML_PARSER_EOF;
10747 ctxt->disableSAX = 1;
10748 if (ctxt->input != NULL) {
10749 ctxt->input->cur = BAD_CAST"";
10750 ctxt->input->base = ctxt->input->cur;
10751 }
10752}
10753
10754/**
Owen Taylor3473f882001-02-23 17:55:21 +000010755 * xmlCreateIOParserCtxt:
10756 * @sax: a SAX handler
10757 * @user_data: The user data returned on SAX callbacks
10758 * @ioread: an I/O read function
10759 * @ioclose: an I/O close function
10760 * @ioctx: an I/O handler
10761 * @enc: the charset encoding if known
10762 *
10763 * Create a parser context for using the XML parser with an existing
10764 * I/O stream
10765 *
10766 * Returns the new parser context or NULL
10767 */
10768xmlParserCtxtPtr
10769xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10770 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10771 void *ioctx, xmlCharEncoding enc) {
10772 xmlParserCtxtPtr ctxt;
10773 xmlParserInputPtr inputStream;
10774 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010775
10776 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010777
10778 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10779 if (buf == NULL) return(NULL);
10780
10781 ctxt = xmlNewParserCtxt();
10782 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010783 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010784 return(NULL);
10785 }
10786 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010787#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010788 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010789#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010790 xmlFree(ctxt->sax);
10791 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10792 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010793 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010794 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010795 return(NULL);
10796 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010797 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10798 if (sax->initialized == XML_SAX2_MAGIC)
10799 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10800 else
10801 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010802 if (user_data != NULL)
10803 ctxt->userData = user_data;
10804 }
10805
10806 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10807 if (inputStream == NULL) {
10808 xmlFreeParserCtxt(ctxt);
10809 return(NULL);
10810 }
10811 inputPush(ctxt, inputStream);
10812
10813 return(ctxt);
10814}
10815
Daniel Veillard4432df22003-09-28 18:58:27 +000010816#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010817/************************************************************************
10818 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010819 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010820 * *
10821 ************************************************************************/
10822
10823/**
10824 * xmlIOParseDTD:
10825 * @sax: the SAX handler block or NULL
10826 * @input: an Input Buffer
10827 * @enc: the charset encoding if known
10828 *
10829 * Load and parse a DTD
10830 *
10831 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010832 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010833 */
10834
10835xmlDtdPtr
10836xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10837 xmlCharEncoding enc) {
10838 xmlDtdPtr ret = NULL;
10839 xmlParserCtxtPtr ctxt;
10840 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010841 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010842
10843 if (input == NULL)
10844 return(NULL);
10845
10846 ctxt = xmlNewParserCtxt();
10847 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010848 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010849 return(NULL);
10850 }
10851
10852 /*
10853 * Set-up the SAX context
10854 */
10855 if (sax != NULL) {
10856 if (ctxt->sax != NULL)
10857 xmlFree(ctxt->sax);
10858 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010859 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010861 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010862
10863 /*
10864 * generate a parser input from the I/O handler
10865 */
10866
Daniel Veillard43caefb2003-12-07 19:32:22 +000010867 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010868 if (pinput == NULL) {
10869 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010870 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010871 xmlFreeParserCtxt(ctxt);
10872 return(NULL);
10873 }
10874
10875 /*
10876 * plug some encoding conversion routines here.
10877 */
10878 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010879 if (enc != XML_CHAR_ENCODING_NONE) {
10880 xmlSwitchEncoding(ctxt, enc);
10881 }
Owen Taylor3473f882001-02-23 17:55:21 +000010882
10883 pinput->filename = NULL;
10884 pinput->line = 1;
10885 pinput->col = 1;
10886 pinput->base = ctxt->input->cur;
10887 pinput->cur = ctxt->input->cur;
10888 pinput->free = NULL;
10889
10890 /*
10891 * let's parse that entity knowing it's an external subset.
10892 */
10893 ctxt->inSubset = 2;
10894 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10895 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10896 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010897
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010898 if ((enc == XML_CHAR_ENCODING_NONE) &&
10899 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010900 /*
10901 * Get the 4 first bytes and decode the charset
10902 * if enc != XML_CHAR_ENCODING_NONE
10903 * plug some encoding conversion routines.
10904 */
10905 start[0] = RAW;
10906 start[1] = NXT(1);
10907 start[2] = NXT(2);
10908 start[3] = NXT(3);
10909 enc = xmlDetectCharEncoding(start, 4);
10910 if (enc != XML_CHAR_ENCODING_NONE) {
10911 xmlSwitchEncoding(ctxt, enc);
10912 }
10913 }
10914
Owen Taylor3473f882001-02-23 17:55:21 +000010915 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10916
10917 if (ctxt->myDoc != NULL) {
10918 if (ctxt->wellFormed) {
10919 ret = ctxt->myDoc->extSubset;
10920 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010921 if (ret != NULL) {
10922 xmlNodePtr tmp;
10923
10924 ret->doc = NULL;
10925 tmp = ret->children;
10926 while (tmp != NULL) {
10927 tmp->doc = NULL;
10928 tmp = tmp->next;
10929 }
10930 }
Owen Taylor3473f882001-02-23 17:55:21 +000010931 } else {
10932 ret = NULL;
10933 }
10934 xmlFreeDoc(ctxt->myDoc);
10935 ctxt->myDoc = NULL;
10936 }
10937 if (sax != NULL) ctxt->sax = NULL;
10938 xmlFreeParserCtxt(ctxt);
10939
10940 return(ret);
10941}
10942
10943/**
10944 * xmlSAXParseDTD:
10945 * @sax: the SAX handler block
10946 * @ExternalID: a NAME* containing the External ID of the DTD
10947 * @SystemID: a NAME* containing the URL to the DTD
10948 *
10949 * Load and parse an external subset.
10950 *
10951 * Returns the resulting xmlDtdPtr or NULL in case of error.
10952 */
10953
10954xmlDtdPtr
10955xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10956 const xmlChar *SystemID) {
10957 xmlDtdPtr ret = NULL;
10958 xmlParserCtxtPtr ctxt;
10959 xmlParserInputPtr input = NULL;
10960 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010961 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010962
10963 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10964
10965 ctxt = xmlNewParserCtxt();
10966 if (ctxt == NULL) {
10967 return(NULL);
10968 }
10969
10970 /*
10971 * Set-up the SAX context
10972 */
10973 if (sax != NULL) {
10974 if (ctxt->sax != NULL)
10975 xmlFree(ctxt->sax);
10976 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010977 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010978 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010979
10980 /*
10981 * Canonicalise the system ID
10982 */
10983 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010984 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010985 xmlFreeParserCtxt(ctxt);
10986 return(NULL);
10987 }
Owen Taylor3473f882001-02-23 17:55:21 +000010988
10989 /*
10990 * Ask the Entity resolver to load the damn thing
10991 */
10992
10993 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000010994 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
10995 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010996 if (input == NULL) {
10997 if (sax != NULL) ctxt->sax = NULL;
10998 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010999 if (systemIdCanonic != NULL)
11000 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011001 return(NULL);
11002 }
11003
11004 /*
11005 * plug some encoding conversion routines here.
11006 */
11007 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011008 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11009 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11010 xmlSwitchEncoding(ctxt, enc);
11011 }
Owen Taylor3473f882001-02-23 17:55:21 +000011012
11013 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011014 input->filename = (char *) systemIdCanonic;
11015 else
11016 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011017 input->line = 1;
11018 input->col = 1;
11019 input->base = ctxt->input->cur;
11020 input->cur = ctxt->input->cur;
11021 input->free = NULL;
11022
11023 /*
11024 * let's parse that entity knowing it's an external subset.
11025 */
11026 ctxt->inSubset = 2;
11027 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11028 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11029 ExternalID, SystemID);
11030 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11031
11032 if (ctxt->myDoc != NULL) {
11033 if (ctxt->wellFormed) {
11034 ret = ctxt->myDoc->extSubset;
11035 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011036 if (ret != NULL) {
11037 xmlNodePtr tmp;
11038
11039 ret->doc = NULL;
11040 tmp = ret->children;
11041 while (tmp != NULL) {
11042 tmp->doc = NULL;
11043 tmp = tmp->next;
11044 }
11045 }
Owen Taylor3473f882001-02-23 17:55:21 +000011046 } else {
11047 ret = NULL;
11048 }
11049 xmlFreeDoc(ctxt->myDoc);
11050 ctxt->myDoc = NULL;
11051 }
11052 if (sax != NULL) ctxt->sax = NULL;
11053 xmlFreeParserCtxt(ctxt);
11054
11055 return(ret);
11056}
11057
Daniel Veillard4432df22003-09-28 18:58:27 +000011058
Owen Taylor3473f882001-02-23 17:55:21 +000011059/**
11060 * xmlParseDTD:
11061 * @ExternalID: a NAME* containing the External ID of the DTD
11062 * @SystemID: a NAME* containing the URL to the DTD
11063 *
11064 * Load and parse an external subset.
11065 *
11066 * Returns the resulting xmlDtdPtr or NULL in case of error.
11067 */
11068
11069xmlDtdPtr
11070xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11071 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11072}
Daniel Veillard4432df22003-09-28 18:58:27 +000011073#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011074
11075/************************************************************************
11076 * *
11077 * Front ends when parsing an Entity *
11078 * *
11079 ************************************************************************/
11080
11081/**
Owen Taylor3473f882001-02-23 17:55:21 +000011082 * xmlParseCtxtExternalEntity:
11083 * @ctx: the existing parsing context
11084 * @URL: the URL for the entity to load
11085 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011086 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011087 *
11088 * Parse an external general entity within an existing parsing context
11089 * An external general parsed entity is well-formed if it matches the
11090 * production labeled extParsedEnt.
11091 *
11092 * [78] extParsedEnt ::= TextDecl? content
11093 *
11094 * Returns 0 if the entity is well formed, -1 in case of args problem and
11095 * the parser error code otherwise
11096 */
11097
11098int
11099xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011100 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011101 xmlParserCtxtPtr ctxt;
11102 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011103 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011104 xmlSAXHandlerPtr oldsax = NULL;
11105 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011106 xmlChar start[4];
11107 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011108 xmlParserInputPtr inputStream;
11109 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011110
Daniel Veillardce682bc2004-11-05 17:22:25 +000011111 if (ctx == NULL) return(-1);
11112
Owen Taylor3473f882001-02-23 17:55:21 +000011113 if (ctx->depth > 40) {
11114 return(XML_ERR_ENTITY_LOOP);
11115 }
11116
Daniel Veillardcda96922001-08-21 10:56:31 +000011117 if (lst != NULL)
11118 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011119 if ((URL == NULL) && (ID == NULL))
11120 return(-1);
11121 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11122 return(-1);
11123
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011124 ctxt = xmlNewParserCtxt();
11125 if (ctxt == NULL) {
11126 return(-1);
11127 }
11128
Owen Taylor3473f882001-02-23 17:55:21 +000011129 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011130 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011131
11132 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11133 if (inputStream == NULL) {
11134 xmlFreeParserCtxt(ctxt);
11135 return(-1);
11136 }
11137
11138 inputPush(ctxt, inputStream);
11139
11140 if ((ctxt->directory == NULL) && (directory == NULL))
11141 directory = xmlParserGetDirectory((char *)URL);
11142 if ((ctxt->directory == NULL) && (directory != NULL))
11143 ctxt->directory = directory;
11144
Owen Taylor3473f882001-02-23 17:55:21 +000011145 oldsax = ctxt->sax;
11146 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011147 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011148 newDoc = xmlNewDoc(BAD_CAST "1.0");
11149 if (newDoc == NULL) {
11150 xmlFreeParserCtxt(ctxt);
11151 return(-1);
11152 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011153 if (ctx->myDoc->dict) {
11154 newDoc->dict = ctx->myDoc->dict;
11155 xmlDictReference(newDoc->dict);
11156 }
Owen Taylor3473f882001-02-23 17:55:21 +000011157 if (ctx->myDoc != NULL) {
11158 newDoc->intSubset = ctx->myDoc->intSubset;
11159 newDoc->extSubset = ctx->myDoc->extSubset;
11160 }
11161 if (ctx->myDoc->URL != NULL) {
11162 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11163 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011164 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11165 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011166 ctxt->sax = oldsax;
11167 xmlFreeParserCtxt(ctxt);
11168 newDoc->intSubset = NULL;
11169 newDoc->extSubset = NULL;
11170 xmlFreeDoc(newDoc);
11171 return(-1);
11172 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011173 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011174 nodePush(ctxt, newDoc->children);
11175 if (ctx->myDoc == NULL) {
11176 ctxt->myDoc = newDoc;
11177 } else {
11178 ctxt->myDoc = ctx->myDoc;
11179 newDoc->children->doc = ctx->myDoc;
11180 }
11181
Daniel Veillard87a764e2001-06-20 17:41:10 +000011182 /*
11183 * Get the 4 first bytes and decode the charset
11184 * if enc != XML_CHAR_ENCODING_NONE
11185 * plug some encoding conversion routines.
11186 */
11187 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011188 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11189 start[0] = RAW;
11190 start[1] = NXT(1);
11191 start[2] = NXT(2);
11192 start[3] = NXT(3);
11193 enc = xmlDetectCharEncoding(start, 4);
11194 if (enc != XML_CHAR_ENCODING_NONE) {
11195 xmlSwitchEncoding(ctxt, enc);
11196 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011197 }
11198
Owen Taylor3473f882001-02-23 17:55:21 +000011199 /*
11200 * Parse a possible text declaration first
11201 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011202 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011203 xmlParseTextDecl(ctxt);
11204 }
11205
11206 /*
11207 * Doing validity checking on chunk doesn't make sense
11208 */
11209 ctxt->instate = XML_PARSER_CONTENT;
11210 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011211 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011212 ctxt->loadsubset = ctx->loadsubset;
11213 ctxt->depth = ctx->depth + 1;
11214 ctxt->replaceEntities = ctx->replaceEntities;
11215 if (ctxt->validate) {
11216 ctxt->vctxt.error = ctx->vctxt.error;
11217 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011218 } else {
11219 ctxt->vctxt.error = NULL;
11220 ctxt->vctxt.warning = NULL;
11221 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011222 ctxt->vctxt.nodeTab = NULL;
11223 ctxt->vctxt.nodeNr = 0;
11224 ctxt->vctxt.nodeMax = 0;
11225 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011226 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11227 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011228 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11229 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11230 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011231 ctxt->dictNames = ctx->dictNames;
11232 ctxt->attsDefault = ctx->attsDefault;
11233 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011234 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011235
11236 xmlParseContent(ctxt);
11237
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011238 ctx->validate = ctxt->validate;
11239 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011240 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011241 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011242 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011243 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011244 }
11245 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011246 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011247 }
11248
11249 if (!ctxt->wellFormed) {
11250 if (ctxt->errNo == 0)
11251 ret = 1;
11252 else
11253 ret = ctxt->errNo;
11254 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011255 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011256 xmlNodePtr cur;
11257
11258 /*
11259 * Return the newly created nodeset after unlinking it from
11260 * they pseudo parent.
11261 */
11262 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011263 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011264 while (cur != NULL) {
11265 cur->parent = NULL;
11266 cur = cur->next;
11267 }
11268 newDoc->children->children = NULL;
11269 }
11270 ret = 0;
11271 }
11272 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011273 ctxt->dict = NULL;
11274 ctxt->attsDefault = NULL;
11275 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011276 xmlFreeParserCtxt(ctxt);
11277 newDoc->intSubset = NULL;
11278 newDoc->extSubset = NULL;
11279 xmlFreeDoc(newDoc);
11280
11281 return(ret);
11282}
11283
11284/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011285 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011286 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011287 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011288 * @sax: the SAX handler bloc (possibly NULL)
11289 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11290 * @depth: Used for loop detection, use 0
11291 * @URL: the URL for the entity to load
11292 * @ID: the System ID for the entity to load
11293 * @list: the return value for the set of parsed nodes
11294 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011295 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011296 *
11297 * Returns 0 if the entity is well formed, -1 in case of args problem and
11298 * the parser error code otherwise
11299 */
11300
Daniel Veillard7d515752003-09-26 19:12:37 +000011301static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011302xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11303 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011304 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011305 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011306 xmlParserCtxtPtr ctxt;
11307 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011308 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011309 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011310 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011311 xmlChar start[4];
11312 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011313
11314 if (depth > 40) {
11315 return(XML_ERR_ENTITY_LOOP);
11316 }
11317
11318
11319
11320 if (list != NULL)
11321 *list = NULL;
11322 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011323 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011324 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011325 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011326
11327
11328 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011329 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011330 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011331 if (oldctxt != NULL) {
11332 ctxt->_private = oldctxt->_private;
11333 ctxt->loadsubset = oldctxt->loadsubset;
11334 ctxt->validate = oldctxt->validate;
11335 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011336 ctxt->record_info = oldctxt->record_info;
11337 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11338 ctxt->node_seq.length = oldctxt->node_seq.length;
11339 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011340 } else {
11341 /*
11342 * Doing validity checking on chunk without context
11343 * doesn't make sense
11344 */
11345 ctxt->_private = NULL;
11346 ctxt->validate = 0;
11347 ctxt->external = 2;
11348 ctxt->loadsubset = 0;
11349 }
Owen Taylor3473f882001-02-23 17:55:21 +000011350 if (sax != NULL) {
11351 oldsax = ctxt->sax;
11352 ctxt->sax = sax;
11353 if (user_data != NULL)
11354 ctxt->userData = user_data;
11355 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011356 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011357 newDoc = xmlNewDoc(BAD_CAST "1.0");
11358 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011359 ctxt->node_seq.maximum = 0;
11360 ctxt->node_seq.length = 0;
11361 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011362 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011363 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011364 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011365 newDoc->intSubset = doc->intSubset;
11366 newDoc->extSubset = doc->extSubset;
11367 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011368 xmlDictReference(newDoc->dict);
11369
Owen Taylor3473f882001-02-23 17:55:21 +000011370 if (doc->URL != NULL) {
11371 newDoc->URL = xmlStrdup(doc->URL);
11372 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011373 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11374 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011375 if (sax != NULL)
11376 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011377 ctxt->node_seq.maximum = 0;
11378 ctxt->node_seq.length = 0;
11379 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011380 xmlFreeParserCtxt(ctxt);
11381 newDoc->intSubset = NULL;
11382 newDoc->extSubset = NULL;
11383 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011384 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011385 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011386 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011387 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011388 ctxt->myDoc = doc;
11389 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011390
Daniel Veillard87a764e2001-06-20 17:41:10 +000011391 /*
11392 * Get the 4 first bytes and decode the charset
11393 * if enc != XML_CHAR_ENCODING_NONE
11394 * plug some encoding conversion routines.
11395 */
11396 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011397 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11398 start[0] = RAW;
11399 start[1] = NXT(1);
11400 start[2] = NXT(2);
11401 start[3] = NXT(3);
11402 enc = xmlDetectCharEncoding(start, 4);
11403 if (enc != XML_CHAR_ENCODING_NONE) {
11404 xmlSwitchEncoding(ctxt, enc);
11405 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011406 }
11407
Owen Taylor3473f882001-02-23 17:55:21 +000011408 /*
11409 * Parse a possible text declaration first
11410 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011411 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011412 xmlParseTextDecl(ctxt);
11413 }
11414
Owen Taylor3473f882001-02-23 17:55:21 +000011415 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011416 ctxt->depth = depth;
11417
11418 xmlParseContent(ctxt);
11419
Daniel Veillard561b7f82002-03-20 21:55:57 +000011420 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011421 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011422 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011423 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011424 }
11425 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011426 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011427 }
11428
11429 if (!ctxt->wellFormed) {
11430 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011431 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011432 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011433 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011434 } else {
11435 if (list != NULL) {
11436 xmlNodePtr cur;
11437
11438 /*
11439 * Return the newly created nodeset after unlinking it from
11440 * they pseudo parent.
11441 */
11442 cur = newDoc->children->children;
11443 *list = cur;
11444 while (cur != NULL) {
11445 cur->parent = NULL;
11446 cur = cur->next;
11447 }
11448 newDoc->children->children = NULL;
11449 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011450 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011451 }
11452 if (sax != NULL)
11453 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011454 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11455 oldctxt->node_seq.length = ctxt->node_seq.length;
11456 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011457 ctxt->node_seq.maximum = 0;
11458 ctxt->node_seq.length = 0;
11459 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011460 xmlFreeParserCtxt(ctxt);
11461 newDoc->intSubset = NULL;
11462 newDoc->extSubset = NULL;
11463 xmlFreeDoc(newDoc);
11464
11465 return(ret);
11466}
11467
Daniel Veillard81273902003-09-30 00:43:48 +000011468#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011469/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011470 * xmlParseExternalEntity:
11471 * @doc: the document the chunk pertains to
11472 * @sax: the SAX handler bloc (possibly NULL)
11473 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11474 * @depth: Used for loop detection, use 0
11475 * @URL: the URL for the entity to load
11476 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011477 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011478 *
11479 * Parse an external general entity
11480 * An external general parsed entity is well-formed if it matches the
11481 * production labeled extParsedEnt.
11482 *
11483 * [78] extParsedEnt ::= TextDecl? content
11484 *
11485 * Returns 0 if the entity is well formed, -1 in case of args problem and
11486 * the parser error code otherwise
11487 */
11488
11489int
11490xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011491 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011492 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011493 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011494}
11495
11496/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011497 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011498 * @doc: the document the chunk pertains to
11499 * @sax: the SAX handler bloc (possibly NULL)
11500 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11501 * @depth: Used for loop detection, use 0
11502 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011503 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011504 *
11505 * Parse a well-balanced chunk of an XML document
11506 * called by the parser
11507 * The allowed sequence for the Well Balanced Chunk is the one defined by
11508 * the content production in the XML grammar:
11509 *
11510 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11511 *
11512 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11513 * the parser error code otherwise
11514 */
11515
11516int
11517xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011518 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011519 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11520 depth, string, lst, 0 );
11521}
Daniel Veillard81273902003-09-30 00:43:48 +000011522#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011523
11524/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011525 * xmlParseBalancedChunkMemoryInternal:
11526 * @oldctxt: the existing parsing context
11527 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11528 * @user_data: the user data field for the parser context
11529 * @lst: the return value for the set of parsed nodes
11530 *
11531 *
11532 * Parse a well-balanced chunk of an XML document
11533 * called by the parser
11534 * The allowed sequence for the Well Balanced Chunk is the one defined by
11535 * the content production in the XML grammar:
11536 *
11537 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11538 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011539 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11540 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011541 *
11542 * In case recover is set to 1, the nodelist will not be empty even if
11543 * the parsed chunk is not well balanced.
11544 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011545static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011546xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11547 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11548 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011549 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011550 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011551 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011552 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011553 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011554 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011555 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556
11557 if (oldctxt->depth > 40) {
11558 return(XML_ERR_ENTITY_LOOP);
11559 }
11560
11561
11562 if (lst != NULL)
11563 *lst = NULL;
11564 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011565 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011566
11567 size = xmlStrlen(string);
11568
11569 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011570 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011571 if (user_data != NULL)
11572 ctxt->userData = user_data;
11573 else
11574 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011575 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11576 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011577 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11578 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11579 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011580
11581 oldsax = ctxt->sax;
11582 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011583 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011584 ctxt->replaceEntities = oldctxt->replaceEntities;
11585 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011586
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011587 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011588 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011589 newDoc = xmlNewDoc(BAD_CAST "1.0");
11590 if (newDoc == NULL) {
11591 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011592 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011593 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011594 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011595 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011596 newDoc->dict = ctxt->dict;
11597 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011598 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011599 } else {
11600 ctxt->myDoc = oldctxt->myDoc;
11601 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011602 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011603 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011604 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11605 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011606 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011607 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011608 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011609 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011610 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011611 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011612 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011613 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011614 ctxt->myDoc->children = NULL;
11615 ctxt->myDoc->last = NULL;
11616 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011617 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011618 ctxt->instate = XML_PARSER_CONTENT;
11619 ctxt->depth = oldctxt->depth + 1;
11620
Daniel Veillard328f48c2002-11-15 15:24:34 +000011621 ctxt->validate = 0;
11622 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011623 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11624 /*
11625 * ID/IDREF registration will be done in xmlValidateElement below
11626 */
11627 ctxt->loadsubset |= XML_SKIP_IDS;
11628 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011629 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011630 ctxt->attsDefault = oldctxt->attsDefault;
11631 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011632
Daniel Veillard68e9e742002-11-16 15:35:11 +000011633 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011634 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011635 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011636 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011637 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011638 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011639 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011640 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011641 }
11642
11643 if (!ctxt->wellFormed) {
11644 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011645 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011646 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011647 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011648 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011649 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011650 }
11651
William M. Brack7b9154b2003-09-27 19:23:50 +000011652 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011653 xmlNodePtr cur;
11654
11655 /*
11656 * Return the newly created nodeset after unlinking it from
11657 * they pseudo parent.
11658 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011659 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011660 *lst = cur;
11661 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011662#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011663 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11664 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11665 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011666 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11667 oldctxt->myDoc, cur);
11668 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011669#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011670 cur->parent = NULL;
11671 cur = cur->next;
11672 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011673 ctxt->myDoc->children->children = NULL;
11674 }
11675 if (ctxt->myDoc != NULL) {
11676 xmlFreeNode(ctxt->myDoc->children);
11677 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011678 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011679 }
11680
11681 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011682 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011683 ctxt->attsDefault = NULL;
11684 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011685 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011686 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011687 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011688 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011689
11690 return(ret);
11691}
11692
Daniel Veillard29b17482004-08-16 00:39:03 +000011693/**
11694 * xmlParseInNodeContext:
11695 * @node: the context node
11696 * @data: the input string
11697 * @datalen: the input string length in bytes
11698 * @options: a combination of xmlParserOption
11699 * @lst: the return value for the set of parsed nodes
11700 *
11701 * Parse a well-balanced chunk of an XML document
11702 * within the context (DTD, namespaces, etc ...) of the given node.
11703 *
11704 * The allowed sequence for the data is a Well Balanced Chunk defined by
11705 * the content production in the XML grammar:
11706 *
11707 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11708 *
11709 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11710 * error code otherwise
11711 */
11712xmlParserErrors
11713xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11714 int options, xmlNodePtr *lst) {
11715#ifdef SAX2
11716 xmlParserCtxtPtr ctxt;
11717 xmlDocPtr doc = NULL;
11718 xmlNodePtr fake, cur;
11719 int nsnr = 0;
11720
11721 xmlParserErrors ret = XML_ERR_OK;
11722
11723 /*
11724 * check all input parameters, grab the document
11725 */
11726 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11727 return(XML_ERR_INTERNAL_ERROR);
11728 switch (node->type) {
11729 case XML_ELEMENT_NODE:
11730 case XML_ATTRIBUTE_NODE:
11731 case XML_TEXT_NODE:
11732 case XML_CDATA_SECTION_NODE:
11733 case XML_ENTITY_REF_NODE:
11734 case XML_PI_NODE:
11735 case XML_COMMENT_NODE:
11736 case XML_DOCUMENT_NODE:
11737 case XML_HTML_DOCUMENT_NODE:
11738 break;
11739 default:
11740 return(XML_ERR_INTERNAL_ERROR);
11741
11742 }
11743 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11744 (node->type != XML_DOCUMENT_NODE) &&
11745 (node->type != XML_HTML_DOCUMENT_NODE))
11746 node = node->parent;
11747 if (node == NULL)
11748 return(XML_ERR_INTERNAL_ERROR);
11749 if (node->type == XML_ELEMENT_NODE)
11750 doc = node->doc;
11751 else
11752 doc = (xmlDocPtr) node;
11753 if (doc == NULL)
11754 return(XML_ERR_INTERNAL_ERROR);
11755
11756 /*
11757 * allocate a context and set-up everything not related to the
11758 * node position in the tree
11759 */
11760 if (doc->type == XML_DOCUMENT_NODE)
11761 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11762#ifdef LIBXML_HTML_ENABLED
11763 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11764 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11765#endif
11766 else
11767 return(XML_ERR_INTERNAL_ERROR);
11768
11769 if (ctxt == NULL)
11770 return(XML_ERR_NO_MEMORY);
11771 fake = xmlNewComment(NULL);
11772 if (fake == NULL) {
11773 xmlFreeParserCtxt(ctxt);
11774 return(XML_ERR_NO_MEMORY);
11775 }
11776 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011777
11778 /*
11779 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11780 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11781 * we must wait until the last moment to free the original one.
11782 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011783 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011784 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011785 xmlDictFree(ctxt->dict);
11786 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011787 } else
11788 options |= XML_PARSE_NODICT;
11789
11790 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011791 xmlDetectSAX2(ctxt);
11792 ctxt->myDoc = doc;
11793
11794 if (node->type == XML_ELEMENT_NODE) {
11795 nodePush(ctxt, node);
11796 /*
11797 * initialize the SAX2 namespaces stack
11798 */
11799 cur = node;
11800 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11801 xmlNsPtr ns = cur->nsDef;
11802 const xmlChar *iprefix, *ihref;
11803
11804 while (ns != NULL) {
11805 if (ctxt->dict) {
11806 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11807 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11808 } else {
11809 iprefix = ns->prefix;
11810 ihref = ns->href;
11811 }
11812
11813 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11814 nsPush(ctxt, iprefix, ihref);
11815 nsnr++;
11816 }
11817 ns = ns->next;
11818 }
11819 cur = cur->parent;
11820 }
11821 ctxt->instate = XML_PARSER_CONTENT;
11822 }
11823
11824 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11825 /*
11826 * ID/IDREF registration will be done in xmlValidateElement below
11827 */
11828 ctxt->loadsubset |= XML_SKIP_IDS;
11829 }
11830
Daniel Veillard499cc922006-01-18 17:22:35 +000011831#ifdef LIBXML_HTML_ENABLED
11832 if (doc->type == XML_HTML_DOCUMENT_NODE)
11833 __htmlParseContent(ctxt);
11834 else
11835#endif
11836 xmlParseContent(ctxt);
11837
Daniel Veillard29b17482004-08-16 00:39:03 +000011838 nsPop(ctxt, nsnr);
11839 if ((RAW == '<') && (NXT(1) == '/')) {
11840 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11841 } else if (RAW != 0) {
11842 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11843 }
11844 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11845 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11846 ctxt->wellFormed = 0;
11847 }
11848
11849 if (!ctxt->wellFormed) {
11850 if (ctxt->errNo == 0)
11851 ret = XML_ERR_INTERNAL_ERROR;
11852 else
11853 ret = (xmlParserErrors)ctxt->errNo;
11854 } else {
11855 ret = XML_ERR_OK;
11856 }
11857
11858 /*
11859 * Return the newly created nodeset after unlinking it from
11860 * the pseudo sibling.
11861 */
11862
11863 cur = fake->next;
11864 fake->next = NULL;
11865 node->last = fake;
11866
11867 if (cur != NULL) {
11868 cur->prev = NULL;
11869 }
11870
11871 *lst = cur;
11872
11873 while (cur != NULL) {
11874 cur->parent = NULL;
11875 cur = cur->next;
11876 }
11877
11878 xmlUnlinkNode(fake);
11879 xmlFreeNode(fake);
11880
11881
11882 if (ret != XML_ERR_OK) {
11883 xmlFreeNodeList(*lst);
11884 *lst = NULL;
11885 }
William M. Brackc3f81342004-10-03 01:22:44 +000011886
William M. Brackb7b54de2004-10-06 16:38:01 +000011887 if (doc->dict != NULL)
11888 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011889 xmlFreeParserCtxt(ctxt);
11890
11891 return(ret);
11892#else /* !SAX2 */
11893 return(XML_ERR_INTERNAL_ERROR);
11894#endif
11895}
11896
Daniel Veillard81273902003-09-30 00:43:48 +000011897#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011898/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011899 * xmlParseBalancedChunkMemoryRecover:
11900 * @doc: the document the chunk pertains to
11901 * @sax: the SAX handler bloc (possibly NULL)
11902 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11903 * @depth: Used for loop detection, use 0
11904 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11905 * @lst: the return value for the set of parsed nodes
11906 * @recover: return nodes even if the data is broken (use 0)
11907 *
11908 *
11909 * Parse a well-balanced chunk of an XML document
11910 * called by the parser
11911 * The allowed sequence for the Well Balanced Chunk is the one defined by
11912 * the content production in the XML grammar:
11913 *
11914 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11915 *
11916 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11917 * the parser error code otherwise
11918 *
11919 * In case recover is set to 1, the nodelist will not be empty even if
11920 * the parsed chunk is not well balanced.
11921 */
11922int
11923xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11924 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11925 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011926 xmlParserCtxtPtr ctxt;
11927 xmlDocPtr newDoc;
11928 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011929 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011930 int size;
11931 int ret = 0;
11932
11933 if (depth > 40) {
11934 return(XML_ERR_ENTITY_LOOP);
11935 }
11936
11937
Daniel Veillardcda96922001-08-21 10:56:31 +000011938 if (lst != NULL)
11939 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011940 if (string == NULL)
11941 return(-1);
11942
11943 size = xmlStrlen(string);
11944
11945 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11946 if (ctxt == NULL) return(-1);
11947 ctxt->userData = ctxt;
11948 if (sax != NULL) {
11949 oldsax = ctxt->sax;
11950 ctxt->sax = sax;
11951 if (user_data != NULL)
11952 ctxt->userData = user_data;
11953 }
11954 newDoc = xmlNewDoc(BAD_CAST "1.0");
11955 if (newDoc == NULL) {
11956 xmlFreeParserCtxt(ctxt);
11957 return(-1);
11958 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011959 if ((doc != NULL) && (doc->dict != NULL)) {
11960 xmlDictFree(ctxt->dict);
11961 ctxt->dict = doc->dict;
11962 xmlDictReference(ctxt->dict);
11963 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11964 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11965 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11966 ctxt->dictNames = 1;
11967 } else {
11968 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11969 }
Owen Taylor3473f882001-02-23 17:55:21 +000011970 if (doc != NULL) {
11971 newDoc->intSubset = doc->intSubset;
11972 newDoc->extSubset = doc->extSubset;
11973 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011974 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11975 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011976 if (sax != NULL)
11977 ctxt->sax = oldsax;
11978 xmlFreeParserCtxt(ctxt);
11979 newDoc->intSubset = NULL;
11980 newDoc->extSubset = NULL;
11981 xmlFreeDoc(newDoc);
11982 return(-1);
11983 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011984 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11985 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011986 if (doc == NULL) {
11987 ctxt->myDoc = newDoc;
11988 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011989 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011990 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000011991 /* Ensure that doc has XML spec namespace */
11992 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
11993 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000011994 }
11995 ctxt->instate = XML_PARSER_CONTENT;
11996 ctxt->depth = depth;
11997
11998 /*
11999 * Doing validity checking on chunk doesn't make sense
12000 */
12001 ctxt->validate = 0;
12002 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012003 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012004
Daniel Veillardb39bc392002-10-26 19:29:51 +000012005 if ( doc != NULL ){
12006 content = doc->children;
12007 doc->children = NULL;
12008 xmlParseContent(ctxt);
12009 doc->children = content;
12010 }
12011 else {
12012 xmlParseContent(ctxt);
12013 }
Owen Taylor3473f882001-02-23 17:55:21 +000012014 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012015 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012016 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012017 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012018 }
12019 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012020 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012021 }
12022
12023 if (!ctxt->wellFormed) {
12024 if (ctxt->errNo == 0)
12025 ret = 1;
12026 else
12027 ret = ctxt->errNo;
12028 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012029 ret = 0;
12030 }
12031
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012032 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12033 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012034
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012035 /*
12036 * Return the newly created nodeset after unlinking it from
12037 * they pseudo parent.
12038 */
12039 cur = newDoc->children->children;
12040 *lst = cur;
12041 while (cur != NULL) {
12042 xmlSetTreeDoc(cur, doc);
12043 cur->parent = NULL;
12044 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012045 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012046 newDoc->children->children = NULL;
12047 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012048
Owen Taylor3473f882001-02-23 17:55:21 +000012049 if (sax != NULL)
12050 ctxt->sax = oldsax;
12051 xmlFreeParserCtxt(ctxt);
12052 newDoc->intSubset = NULL;
12053 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012054 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012055 xmlFreeDoc(newDoc);
12056
12057 return(ret);
12058}
12059
12060/**
12061 * xmlSAXParseEntity:
12062 * @sax: the SAX handler block
12063 * @filename: the filename
12064 *
12065 * parse an XML external entity out of context and build a tree.
12066 * It use the given SAX function block to handle the parsing callback.
12067 * If sax is NULL, fallback to the default DOM tree building routines.
12068 *
12069 * [78] extParsedEnt ::= TextDecl? content
12070 *
12071 * This correspond to a "Well Balanced" chunk
12072 *
12073 * Returns the resulting document tree
12074 */
12075
12076xmlDocPtr
12077xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12078 xmlDocPtr ret;
12079 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012080
12081 ctxt = xmlCreateFileParserCtxt(filename);
12082 if (ctxt == NULL) {
12083 return(NULL);
12084 }
12085 if (sax != NULL) {
12086 if (ctxt->sax != NULL)
12087 xmlFree(ctxt->sax);
12088 ctxt->sax = sax;
12089 ctxt->userData = NULL;
12090 }
12091
Owen Taylor3473f882001-02-23 17:55:21 +000012092 xmlParseExtParsedEnt(ctxt);
12093
12094 if (ctxt->wellFormed)
12095 ret = ctxt->myDoc;
12096 else {
12097 ret = NULL;
12098 xmlFreeDoc(ctxt->myDoc);
12099 ctxt->myDoc = NULL;
12100 }
12101 if (sax != NULL)
12102 ctxt->sax = NULL;
12103 xmlFreeParserCtxt(ctxt);
12104
12105 return(ret);
12106}
12107
12108/**
12109 * xmlParseEntity:
12110 * @filename: the filename
12111 *
12112 * parse an XML external entity out of context and build a tree.
12113 *
12114 * [78] extParsedEnt ::= TextDecl? content
12115 *
12116 * This correspond to a "Well Balanced" chunk
12117 *
12118 * Returns the resulting document tree
12119 */
12120
12121xmlDocPtr
12122xmlParseEntity(const char *filename) {
12123 return(xmlSAXParseEntity(NULL, filename));
12124}
Daniel Veillard81273902003-09-30 00:43:48 +000012125#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012126
12127/**
12128 * xmlCreateEntityParserCtxt:
12129 * @URL: the entity URL
12130 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012131 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012132 *
12133 * Create a parser context for an external entity
12134 * Automatic support for ZLIB/Compress compressed document is provided
12135 * by default if found at compile-time.
12136 *
12137 * Returns the new parser context or NULL
12138 */
12139xmlParserCtxtPtr
12140xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12141 const xmlChar *base) {
12142 xmlParserCtxtPtr ctxt;
12143 xmlParserInputPtr inputStream;
12144 char *directory = NULL;
12145 xmlChar *uri;
12146
12147 ctxt = xmlNewParserCtxt();
12148 if (ctxt == NULL) {
12149 return(NULL);
12150 }
12151
12152 uri = xmlBuildURI(URL, base);
12153
12154 if (uri == NULL) {
12155 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12156 if (inputStream == NULL) {
12157 xmlFreeParserCtxt(ctxt);
12158 return(NULL);
12159 }
12160
12161 inputPush(ctxt, inputStream);
12162
12163 if ((ctxt->directory == NULL) && (directory == NULL))
12164 directory = xmlParserGetDirectory((char *)URL);
12165 if ((ctxt->directory == NULL) && (directory != NULL))
12166 ctxt->directory = directory;
12167 } else {
12168 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12169 if (inputStream == NULL) {
12170 xmlFree(uri);
12171 xmlFreeParserCtxt(ctxt);
12172 return(NULL);
12173 }
12174
12175 inputPush(ctxt, inputStream);
12176
12177 if ((ctxt->directory == NULL) && (directory == NULL))
12178 directory = xmlParserGetDirectory((char *)uri);
12179 if ((ctxt->directory == NULL) && (directory != NULL))
12180 ctxt->directory = directory;
12181 xmlFree(uri);
12182 }
Owen Taylor3473f882001-02-23 17:55:21 +000012183 return(ctxt);
12184}
12185
12186/************************************************************************
12187 * *
12188 * Front ends when parsing from a file *
12189 * *
12190 ************************************************************************/
12191
12192/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012193 * xmlCreateURLParserCtxt:
12194 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012195 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012196 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012197 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012198 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012199 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012200 *
12201 * Returns the new parser context or NULL
12202 */
12203xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012204xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012205{
12206 xmlParserCtxtPtr ctxt;
12207 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012208 char *directory = NULL;
12209
Owen Taylor3473f882001-02-23 17:55:21 +000012210 ctxt = xmlNewParserCtxt();
12211 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012212 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012213 return(NULL);
12214 }
12215
Daniel Veillarddf292f72005-01-16 19:00:15 +000012216 if (options)
12217 xmlCtxtUseOptions(ctxt, options);
12218 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012219
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012220 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012221 if (inputStream == NULL) {
12222 xmlFreeParserCtxt(ctxt);
12223 return(NULL);
12224 }
12225
Owen Taylor3473f882001-02-23 17:55:21 +000012226 inputPush(ctxt, inputStream);
12227 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012228 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012229 if ((ctxt->directory == NULL) && (directory != NULL))
12230 ctxt->directory = directory;
12231
12232 return(ctxt);
12233}
12234
Daniel Veillard61b93382003-11-03 14:28:31 +000012235/**
12236 * xmlCreateFileParserCtxt:
12237 * @filename: the filename
12238 *
12239 * Create a parser context for a file content.
12240 * Automatic support for ZLIB/Compress compressed document is provided
12241 * by default if found at compile-time.
12242 *
12243 * Returns the new parser context or NULL
12244 */
12245xmlParserCtxtPtr
12246xmlCreateFileParserCtxt(const char *filename)
12247{
12248 return(xmlCreateURLParserCtxt(filename, 0));
12249}
12250
Daniel Veillard81273902003-09-30 00:43:48 +000012251#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012252/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012253 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012254 * @sax: the SAX handler block
12255 * @filename: the filename
12256 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12257 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012258 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012259 *
12260 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12261 * compressed document is provided by default if found at compile-time.
12262 * It use the given SAX function block to handle the parsing callback.
12263 * If sax is NULL, fallback to the default DOM tree building routines.
12264 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012265 * User data (void *) is stored within the parser context in the
12266 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012267 *
Owen Taylor3473f882001-02-23 17:55:21 +000012268 * Returns the resulting document tree
12269 */
12270
12271xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012272xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12273 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012274 xmlDocPtr ret;
12275 xmlParserCtxtPtr ctxt;
12276 char *directory = NULL;
12277
Daniel Veillard635ef722001-10-29 11:48:19 +000012278 xmlInitParser();
12279
Owen Taylor3473f882001-02-23 17:55:21 +000012280 ctxt = xmlCreateFileParserCtxt(filename);
12281 if (ctxt == NULL) {
12282 return(NULL);
12283 }
12284 if (sax != NULL) {
12285 if (ctxt->sax != NULL)
12286 xmlFree(ctxt->sax);
12287 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012288 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012289 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012290 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012291 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012292 }
Owen Taylor3473f882001-02-23 17:55:21 +000012293
12294 if ((ctxt->directory == NULL) && (directory == NULL))
12295 directory = xmlParserGetDirectory(filename);
12296 if ((ctxt->directory == NULL) && (directory != NULL))
12297 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12298
Daniel Veillarddad3f682002-11-17 16:47:27 +000012299 ctxt->recovery = recovery;
12300
Owen Taylor3473f882001-02-23 17:55:21 +000012301 xmlParseDocument(ctxt);
12302
William M. Brackc07329e2003-09-08 01:57:30 +000012303 if ((ctxt->wellFormed) || recovery) {
12304 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012305 if (ret != NULL) {
12306 if (ctxt->input->buf->compressed > 0)
12307 ret->compression = 9;
12308 else
12309 ret->compression = ctxt->input->buf->compressed;
12310 }
William M. Brackc07329e2003-09-08 01:57:30 +000012311 }
Owen Taylor3473f882001-02-23 17:55:21 +000012312 else {
12313 ret = NULL;
12314 xmlFreeDoc(ctxt->myDoc);
12315 ctxt->myDoc = NULL;
12316 }
12317 if (sax != NULL)
12318 ctxt->sax = NULL;
12319 xmlFreeParserCtxt(ctxt);
12320
12321 return(ret);
12322}
12323
12324/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012325 * xmlSAXParseFile:
12326 * @sax: the SAX handler block
12327 * @filename: the filename
12328 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12329 * documents
12330 *
12331 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12332 * compressed document is provided by default if found at compile-time.
12333 * It use the given SAX function block to handle the parsing callback.
12334 * If sax is NULL, fallback to the default DOM tree building routines.
12335 *
12336 * Returns the resulting document tree
12337 */
12338
12339xmlDocPtr
12340xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12341 int recovery) {
12342 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12343}
12344
12345/**
Owen Taylor3473f882001-02-23 17:55:21 +000012346 * xmlRecoverDoc:
12347 * @cur: a pointer to an array of xmlChar
12348 *
12349 * parse an XML in-memory document and build a tree.
12350 * In the case the document is not Well Formed, a tree is built anyway
12351 *
12352 * Returns the resulting document tree
12353 */
12354
12355xmlDocPtr
12356xmlRecoverDoc(xmlChar *cur) {
12357 return(xmlSAXParseDoc(NULL, cur, 1));
12358}
12359
12360/**
12361 * xmlParseFile:
12362 * @filename: the filename
12363 *
12364 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12365 * compressed document is provided by default if found at compile-time.
12366 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012367 * Returns the resulting document tree if the file was wellformed,
12368 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012369 */
12370
12371xmlDocPtr
12372xmlParseFile(const char *filename) {
12373 return(xmlSAXParseFile(NULL, filename, 0));
12374}
12375
12376/**
12377 * xmlRecoverFile:
12378 * @filename: the filename
12379 *
12380 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12381 * compressed document is provided by default if found at compile-time.
12382 * In the case the document is not Well Formed, a tree is built anyway
12383 *
12384 * Returns the resulting document tree
12385 */
12386
12387xmlDocPtr
12388xmlRecoverFile(const char *filename) {
12389 return(xmlSAXParseFile(NULL, filename, 1));
12390}
12391
12392
12393/**
12394 * xmlSetupParserForBuffer:
12395 * @ctxt: an XML parser context
12396 * @buffer: a xmlChar * buffer
12397 * @filename: a file name
12398 *
12399 * Setup the parser context to parse a new buffer; Clears any prior
12400 * contents from the parser context. The buffer parameter must not be
12401 * NULL, but the filename parameter can be
12402 */
12403void
12404xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12405 const char* filename)
12406{
12407 xmlParserInputPtr input;
12408
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012409 if ((ctxt == NULL) || (buffer == NULL))
12410 return;
12411
Owen Taylor3473f882001-02-23 17:55:21 +000012412 input = xmlNewInputStream(ctxt);
12413 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012414 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012415 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012416 return;
12417 }
12418
12419 xmlClearParserCtxt(ctxt);
12420 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012421 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012422 input->base = buffer;
12423 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012424 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012425 inputPush(ctxt, input);
12426}
12427
12428/**
12429 * xmlSAXUserParseFile:
12430 * @sax: a SAX handler
12431 * @user_data: The user data returned on SAX callbacks
12432 * @filename: a file name
12433 *
12434 * parse an XML file and call the given SAX handler routines.
12435 * Automatic support for ZLIB/Compress compressed document is provided
12436 *
12437 * Returns 0 in case of success or a error number otherwise
12438 */
12439int
12440xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12441 const char *filename) {
12442 int ret = 0;
12443 xmlParserCtxtPtr ctxt;
12444
12445 ctxt = xmlCreateFileParserCtxt(filename);
12446 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012447#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012448 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012449#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012450 xmlFree(ctxt->sax);
12451 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012452 xmlDetectSAX2(ctxt);
12453
Owen Taylor3473f882001-02-23 17:55:21 +000012454 if (user_data != NULL)
12455 ctxt->userData = user_data;
12456
12457 xmlParseDocument(ctxt);
12458
12459 if (ctxt->wellFormed)
12460 ret = 0;
12461 else {
12462 if (ctxt->errNo != 0)
12463 ret = ctxt->errNo;
12464 else
12465 ret = -1;
12466 }
12467 if (sax != NULL)
12468 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012469 if (ctxt->myDoc != NULL) {
12470 xmlFreeDoc(ctxt->myDoc);
12471 ctxt->myDoc = NULL;
12472 }
Owen Taylor3473f882001-02-23 17:55:21 +000012473 xmlFreeParserCtxt(ctxt);
12474
12475 return ret;
12476}
Daniel Veillard81273902003-09-30 00:43:48 +000012477#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012478
12479/************************************************************************
12480 * *
12481 * Front ends when parsing from memory *
12482 * *
12483 ************************************************************************/
12484
12485/**
12486 * xmlCreateMemoryParserCtxt:
12487 * @buffer: a pointer to a char array
12488 * @size: the size of the array
12489 *
12490 * Create a parser context for an XML in-memory document.
12491 *
12492 * Returns the new parser context or NULL
12493 */
12494xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012495xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012496 xmlParserCtxtPtr ctxt;
12497 xmlParserInputPtr input;
12498 xmlParserInputBufferPtr buf;
12499
12500 if (buffer == NULL)
12501 return(NULL);
12502 if (size <= 0)
12503 return(NULL);
12504
12505 ctxt = xmlNewParserCtxt();
12506 if (ctxt == NULL)
12507 return(NULL);
12508
Daniel Veillard53350552003-09-18 13:35:51 +000012509 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012510 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012511 if (buf == NULL) {
12512 xmlFreeParserCtxt(ctxt);
12513 return(NULL);
12514 }
Owen Taylor3473f882001-02-23 17:55:21 +000012515
12516 input = xmlNewInputStream(ctxt);
12517 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012518 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012519 xmlFreeParserCtxt(ctxt);
12520 return(NULL);
12521 }
12522
12523 input->filename = NULL;
12524 input->buf = buf;
12525 input->base = input->buf->buffer->content;
12526 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012527 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012528
12529 inputPush(ctxt, input);
12530 return(ctxt);
12531}
12532
Daniel Veillard81273902003-09-30 00:43:48 +000012533#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012534/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012535 * xmlSAXParseMemoryWithData:
12536 * @sax: the SAX handler block
12537 * @buffer: an pointer to a char array
12538 * @size: the size of the array
12539 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12540 * documents
12541 * @data: the userdata
12542 *
12543 * parse an XML in-memory block and use the given SAX function block
12544 * to handle the parsing callback. If sax is NULL, fallback to the default
12545 * DOM tree building routines.
12546 *
12547 * User data (void *) is stored within the parser context in the
12548 * context's _private member, so it is available nearly everywhere in libxml
12549 *
12550 * Returns the resulting document tree
12551 */
12552
12553xmlDocPtr
12554xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12555 int size, int recovery, void *data) {
12556 xmlDocPtr ret;
12557 xmlParserCtxtPtr ctxt;
12558
12559 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12560 if (ctxt == NULL) return(NULL);
12561 if (sax != NULL) {
12562 if (ctxt->sax != NULL)
12563 xmlFree(ctxt->sax);
12564 ctxt->sax = sax;
12565 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012566 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012567 if (data!=NULL) {
12568 ctxt->_private=data;
12569 }
12570
Daniel Veillardadba5f12003-04-04 16:09:01 +000012571 ctxt->recovery = recovery;
12572
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012573 xmlParseDocument(ctxt);
12574
12575 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12576 else {
12577 ret = NULL;
12578 xmlFreeDoc(ctxt->myDoc);
12579 ctxt->myDoc = NULL;
12580 }
12581 if (sax != NULL)
12582 ctxt->sax = NULL;
12583 xmlFreeParserCtxt(ctxt);
12584
12585 return(ret);
12586}
12587
12588/**
Owen Taylor3473f882001-02-23 17:55:21 +000012589 * xmlSAXParseMemory:
12590 * @sax: the SAX handler block
12591 * @buffer: an pointer to a char array
12592 * @size: the size of the array
12593 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12594 * documents
12595 *
12596 * parse an XML in-memory block and use the given SAX function block
12597 * to handle the parsing callback. If sax is NULL, fallback to the default
12598 * DOM tree building routines.
12599 *
12600 * Returns the resulting document tree
12601 */
12602xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012603xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12604 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012605 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012606}
12607
12608/**
12609 * xmlParseMemory:
12610 * @buffer: an pointer to a char array
12611 * @size: the size of the array
12612 *
12613 * parse an XML in-memory block and build a tree.
12614 *
12615 * Returns the resulting document tree
12616 */
12617
Daniel Veillard50822cb2001-07-26 20:05:51 +000012618xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012619 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12620}
12621
12622/**
12623 * xmlRecoverMemory:
12624 * @buffer: an pointer to a char array
12625 * @size: the size of the array
12626 *
12627 * parse an XML in-memory block and build a tree.
12628 * In the case the document is not Well Formed, a tree is built anyway
12629 *
12630 * Returns the resulting document tree
12631 */
12632
Daniel Veillard50822cb2001-07-26 20:05:51 +000012633xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012634 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12635}
12636
12637/**
12638 * xmlSAXUserParseMemory:
12639 * @sax: a SAX handler
12640 * @user_data: The user data returned on SAX callbacks
12641 * @buffer: an in-memory XML document input
12642 * @size: the length of the XML document in bytes
12643 *
12644 * A better SAX parsing routine.
12645 * parse an XML in-memory buffer and call the given SAX handler routines.
12646 *
12647 * Returns 0 in case of success or a error number otherwise
12648 */
12649int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012650 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012651 int ret = 0;
12652 xmlParserCtxtPtr ctxt;
12653 xmlSAXHandlerPtr oldsax = NULL;
12654
Daniel Veillard9e923512002-08-14 08:48:52 +000012655 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012656 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12657 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012658 oldsax = ctxt->sax;
12659 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012660 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012661 if (user_data != NULL)
12662 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012663
12664 xmlParseDocument(ctxt);
12665
12666 if (ctxt->wellFormed)
12667 ret = 0;
12668 else {
12669 if (ctxt->errNo != 0)
12670 ret = ctxt->errNo;
12671 else
12672 ret = -1;
12673 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012674 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012675 if (ctxt->myDoc != NULL) {
12676 xmlFreeDoc(ctxt->myDoc);
12677 ctxt->myDoc = NULL;
12678 }
Owen Taylor3473f882001-02-23 17:55:21 +000012679 xmlFreeParserCtxt(ctxt);
12680
12681 return ret;
12682}
Daniel Veillard81273902003-09-30 00:43:48 +000012683#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012684
12685/**
12686 * xmlCreateDocParserCtxt:
12687 * @cur: a pointer to an array of xmlChar
12688 *
12689 * Creates a parser context for an XML in-memory document.
12690 *
12691 * Returns the new parser context or NULL
12692 */
12693xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012694xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012695 int len;
12696
12697 if (cur == NULL)
12698 return(NULL);
12699 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012700 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012701}
12702
Daniel Veillard81273902003-09-30 00:43:48 +000012703#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012704/**
12705 * xmlSAXParseDoc:
12706 * @sax: the SAX handler block
12707 * @cur: a pointer to an array of xmlChar
12708 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12709 * documents
12710 *
12711 * parse an XML in-memory document and build a tree.
12712 * It use the given SAX function block to handle the parsing callback.
12713 * If sax is NULL, fallback to the default DOM tree building routines.
12714 *
12715 * Returns the resulting document tree
12716 */
12717
12718xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012719xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012720 xmlDocPtr ret;
12721 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012722 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012723
Daniel Veillard38936062004-11-04 17:45:11 +000012724 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012725
12726
12727 ctxt = xmlCreateDocParserCtxt(cur);
12728 if (ctxt == NULL) return(NULL);
12729 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012730 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012731 ctxt->sax = sax;
12732 ctxt->userData = NULL;
12733 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012734 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012735
12736 xmlParseDocument(ctxt);
12737 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12738 else {
12739 ret = NULL;
12740 xmlFreeDoc(ctxt->myDoc);
12741 ctxt->myDoc = NULL;
12742 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012743 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012744 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012745 xmlFreeParserCtxt(ctxt);
12746
12747 return(ret);
12748}
12749
12750/**
12751 * xmlParseDoc:
12752 * @cur: a pointer to an array of xmlChar
12753 *
12754 * parse an XML in-memory document and build a tree.
12755 *
12756 * Returns the resulting document tree
12757 */
12758
12759xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012760xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012761 return(xmlSAXParseDoc(NULL, cur, 0));
12762}
Daniel Veillard81273902003-09-30 00:43:48 +000012763#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012764
Daniel Veillard81273902003-09-30 00:43:48 +000012765#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012766/************************************************************************
12767 * *
12768 * Specific function to keep track of entities references *
12769 * and used by the XSLT debugger *
12770 * *
12771 ************************************************************************/
12772
12773static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12774
12775/**
12776 * xmlAddEntityReference:
12777 * @ent : A valid entity
12778 * @firstNode : A valid first node for children of entity
12779 * @lastNode : A valid last node of children entity
12780 *
12781 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12782 */
12783static void
12784xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12785 xmlNodePtr lastNode)
12786{
12787 if (xmlEntityRefFunc != NULL) {
12788 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12789 }
12790}
12791
12792
12793/**
12794 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012795 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012796 *
12797 * Set the function to call call back when a xml reference has been made
12798 */
12799void
12800xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12801{
12802 xmlEntityRefFunc = func;
12803}
Daniel Veillard81273902003-09-30 00:43:48 +000012804#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012805
12806/************************************************************************
12807 * *
12808 * Miscellaneous *
12809 * *
12810 ************************************************************************/
12811
12812#ifdef LIBXML_XPATH_ENABLED
12813#include <libxml/xpath.h>
12814#endif
12815
Daniel Veillardffa3c742005-07-21 13:24:09 +000012816extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012817static int xmlParserInitialized = 0;
12818
12819/**
12820 * xmlInitParser:
12821 *
12822 * Initialization function for the XML parser.
12823 * This is not reentrant. Call once before processing in case of
12824 * use in multithreaded programs.
12825 */
12826
12827void
12828xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012829 if (xmlParserInitialized != 0)
12830 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012831
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012832 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12833 (xmlGenericError == NULL))
12834 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012835 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012836 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012837 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012838 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012839 xmlDefaultSAXHandlerInit();
12840 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012841#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012842 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012843#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012844#ifdef LIBXML_HTML_ENABLED
12845 htmlInitAutoClose();
12846 htmlDefaultSAXHandlerInit();
12847#endif
12848#ifdef LIBXML_XPATH_ENABLED
12849 xmlXPathInit();
12850#endif
12851 xmlParserInitialized = 1;
12852}
12853
12854/**
12855 * xmlCleanupParser:
12856 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012857 * Cleanup function for the XML library. It tries to reclaim all
12858 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012859 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012860 * function should not prevent reusing the library but one should
12861 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012862 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012863 */
12864
12865void
12866xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012867 if (!xmlParserInitialized)
12868 return;
12869
Owen Taylor3473f882001-02-23 17:55:21 +000012870 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012871#ifdef LIBXML_CATALOG_ENABLED
12872 xmlCatalogCleanup();
12873#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012874 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012875 xmlCleanupInputCallbacks();
12876#ifdef LIBXML_OUTPUT_ENABLED
12877 xmlCleanupOutputCallbacks();
12878#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012879#ifdef LIBXML_SCHEMAS_ENABLED
12880 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012881 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012882#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012883 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012884 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012885 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012886 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012887 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012888}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012889
12890/************************************************************************
12891 * *
12892 * New set (2.6.0) of simpler and more flexible APIs *
12893 * *
12894 ************************************************************************/
12895
12896/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012897 * DICT_FREE:
12898 * @str: a string
12899 *
12900 * Free a string if it is not owned by the "dict" dictionnary in the
12901 * current scope
12902 */
12903#define DICT_FREE(str) \
12904 if ((str) && ((!dict) || \
12905 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12906 xmlFree((char *)(str));
12907
12908/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012909 * xmlCtxtReset:
12910 * @ctxt: an XML parser context
12911 *
12912 * Reset a parser context
12913 */
12914void
12915xmlCtxtReset(xmlParserCtxtPtr ctxt)
12916{
12917 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012918 xmlDictPtr dict;
12919
12920 if (ctxt == NULL)
12921 return;
12922
12923 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012924
12925 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12926 xmlFreeInputStream(input);
12927 }
12928 ctxt->inputNr = 0;
12929 ctxt->input = NULL;
12930
12931 ctxt->spaceNr = 0;
12932 ctxt->spaceTab[0] = -1;
12933 ctxt->space = &ctxt->spaceTab[0];
12934
12935
12936 ctxt->nodeNr = 0;
12937 ctxt->node = NULL;
12938
12939 ctxt->nameNr = 0;
12940 ctxt->name = NULL;
12941
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012942 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012943 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012944 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012945 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012946 DICT_FREE(ctxt->directory);
12947 ctxt->directory = NULL;
12948 DICT_FREE(ctxt->extSubURI);
12949 ctxt->extSubURI = NULL;
12950 DICT_FREE(ctxt->extSubSystem);
12951 ctxt->extSubSystem = NULL;
12952 if (ctxt->myDoc != NULL)
12953 xmlFreeDoc(ctxt->myDoc);
12954 ctxt->myDoc = NULL;
12955
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012956 ctxt->standalone = -1;
12957 ctxt->hasExternalSubset = 0;
12958 ctxt->hasPErefs = 0;
12959 ctxt->html = 0;
12960 ctxt->external = 0;
12961 ctxt->instate = XML_PARSER_START;
12962 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012963
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012964 ctxt->wellFormed = 1;
12965 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012966 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012967 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012968#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969 ctxt->vctxt.userData = ctxt;
12970 ctxt->vctxt.error = xmlParserValidityError;
12971 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012972#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973 ctxt->record_info = 0;
12974 ctxt->nbChars = 0;
12975 ctxt->checkIndex = 0;
12976 ctxt->inSubset = 0;
12977 ctxt->errNo = XML_ERR_OK;
12978 ctxt->depth = 0;
12979 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12980 ctxt->catalogs = NULL;
12981 xmlInitNodeInfoSeq(&ctxt->node_seq);
12982
12983 if (ctxt->attsDefault != NULL) {
12984 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12985 ctxt->attsDefault = NULL;
12986 }
12987 if (ctxt->attsSpecial != NULL) {
12988 xmlHashFree(ctxt->attsSpecial, NULL);
12989 ctxt->attsSpecial = NULL;
12990 }
12991
Daniel Veillard4432df22003-09-28 18:58:27 +000012992#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012993 if (ctxt->catalogs != NULL)
12994 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012995#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012996 if (ctxt->lastError.code != XML_ERR_OK)
12997 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998}
12999
13000/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013001 * xmlCtxtResetPush:
13002 * @ctxt: an XML parser context
13003 * @chunk: a pointer to an array of chars
13004 * @size: number of chars in the array
13005 * @filename: an optional file name or URI
13006 * @encoding: the document encoding, or NULL
13007 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013008 * Reset a push parser context
13009 *
13010 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013011 */
13012int
13013xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13014 int size, const char *filename, const char *encoding)
13015{
13016 xmlParserInputPtr inputStream;
13017 xmlParserInputBufferPtr buf;
13018 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13019
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013020 if (ctxt == NULL)
13021 return(1);
13022
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013023 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13024 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13025
13026 buf = xmlAllocParserInputBuffer(enc);
13027 if (buf == NULL)
13028 return(1);
13029
13030 if (ctxt == NULL) {
13031 xmlFreeParserInputBuffer(buf);
13032 return(1);
13033 }
13034
13035 xmlCtxtReset(ctxt);
13036
13037 if (ctxt->pushTab == NULL) {
13038 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13039 sizeof(xmlChar *));
13040 if (ctxt->pushTab == NULL) {
13041 xmlErrMemory(ctxt, NULL);
13042 xmlFreeParserInputBuffer(buf);
13043 return(1);
13044 }
13045 }
13046
13047 if (filename == NULL) {
13048 ctxt->directory = NULL;
13049 } else {
13050 ctxt->directory = xmlParserGetDirectory(filename);
13051 }
13052
13053 inputStream = xmlNewInputStream(ctxt);
13054 if (inputStream == NULL) {
13055 xmlFreeParserInputBuffer(buf);
13056 return(1);
13057 }
13058
13059 if (filename == NULL)
13060 inputStream->filename = NULL;
13061 else
13062 inputStream->filename = (char *)
13063 xmlCanonicPath((const xmlChar *) filename);
13064 inputStream->buf = buf;
13065 inputStream->base = inputStream->buf->buffer->content;
13066 inputStream->cur = inputStream->buf->buffer->content;
13067 inputStream->end =
13068 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13069
13070 inputPush(ctxt, inputStream);
13071
13072 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13073 (ctxt->input->buf != NULL)) {
13074 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13075 int cur = ctxt->input->cur - ctxt->input->base;
13076
13077 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13078
13079 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13080 ctxt->input->cur = ctxt->input->base + cur;
13081 ctxt->input->end =
13082 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13083 use];
13084#ifdef DEBUG_PUSH
13085 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13086#endif
13087 }
13088
13089 if (encoding != NULL) {
13090 xmlCharEncodingHandlerPtr hdlr;
13091
13092 hdlr = xmlFindCharEncodingHandler(encoding);
13093 if (hdlr != NULL) {
13094 xmlSwitchToEncoding(ctxt, hdlr);
13095 } else {
13096 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13097 "Unsupported encoding %s\n", BAD_CAST encoding);
13098 }
13099 } else if (enc != XML_CHAR_ENCODING_NONE) {
13100 xmlSwitchEncoding(ctxt, enc);
13101 }
13102
13103 return(0);
13104}
13105
13106/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013107 * xmlCtxtUseOptions:
13108 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013109 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013110 *
13111 * Applies the options to the parser context
13112 *
13113 * Returns 0 in case of success, the set of unknown or unimplemented options
13114 * in case of error.
13115 */
13116int
13117xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13118{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013119 if (ctxt == NULL)
13120 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013121 if (options & XML_PARSE_RECOVER) {
13122 ctxt->recovery = 1;
13123 options -= XML_PARSE_RECOVER;
13124 } else
13125 ctxt->recovery = 0;
13126 if (options & XML_PARSE_DTDLOAD) {
13127 ctxt->loadsubset = XML_DETECT_IDS;
13128 options -= XML_PARSE_DTDLOAD;
13129 } else
13130 ctxt->loadsubset = 0;
13131 if (options & XML_PARSE_DTDATTR) {
13132 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13133 options -= XML_PARSE_DTDATTR;
13134 }
13135 if (options & XML_PARSE_NOENT) {
13136 ctxt->replaceEntities = 1;
13137 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13138 options -= XML_PARSE_NOENT;
13139 } else
13140 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013141 if (options & XML_PARSE_PEDANTIC) {
13142 ctxt->pedantic = 1;
13143 options -= XML_PARSE_PEDANTIC;
13144 } else
13145 ctxt->pedantic = 0;
13146 if (options & XML_PARSE_NOBLANKS) {
13147 ctxt->keepBlanks = 0;
13148 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13149 options -= XML_PARSE_NOBLANKS;
13150 } else
13151 ctxt->keepBlanks = 1;
13152 if (options & XML_PARSE_DTDVALID) {
13153 ctxt->validate = 1;
13154 if (options & XML_PARSE_NOWARNING)
13155 ctxt->vctxt.warning = NULL;
13156 if (options & XML_PARSE_NOERROR)
13157 ctxt->vctxt.error = NULL;
13158 options -= XML_PARSE_DTDVALID;
13159 } else
13160 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013161 if (options & XML_PARSE_NOWARNING) {
13162 ctxt->sax->warning = NULL;
13163 options -= XML_PARSE_NOWARNING;
13164 }
13165 if (options & XML_PARSE_NOERROR) {
13166 ctxt->sax->error = NULL;
13167 ctxt->sax->fatalError = NULL;
13168 options -= XML_PARSE_NOERROR;
13169 }
Daniel Veillard81273902003-09-30 00:43:48 +000013170#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013171 if (options & XML_PARSE_SAX1) {
13172 ctxt->sax->startElement = xmlSAX2StartElement;
13173 ctxt->sax->endElement = xmlSAX2EndElement;
13174 ctxt->sax->startElementNs = NULL;
13175 ctxt->sax->endElementNs = NULL;
13176 ctxt->sax->initialized = 1;
13177 options -= XML_PARSE_SAX1;
13178 }
Daniel Veillard81273902003-09-30 00:43:48 +000013179#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013180 if (options & XML_PARSE_NODICT) {
13181 ctxt->dictNames = 0;
13182 options -= XML_PARSE_NODICT;
13183 } else {
13184 ctxt->dictNames = 1;
13185 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013186 if (options & XML_PARSE_NOCDATA) {
13187 ctxt->sax->cdataBlock = NULL;
13188 options -= XML_PARSE_NOCDATA;
13189 }
13190 if (options & XML_PARSE_NSCLEAN) {
13191 ctxt->options |= XML_PARSE_NSCLEAN;
13192 options -= XML_PARSE_NSCLEAN;
13193 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013194 if (options & XML_PARSE_NONET) {
13195 ctxt->options |= XML_PARSE_NONET;
13196 options -= XML_PARSE_NONET;
13197 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013198 if (options & XML_PARSE_COMPACT) {
13199 ctxt->options |= XML_PARSE_COMPACT;
13200 options -= XML_PARSE_COMPACT;
13201 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013202 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013203 return (options);
13204}
13205
13206/**
13207 * xmlDoRead:
13208 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013209 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013210 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013211 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013212 * @reuse: keep the context for reuse
13213 *
13214 * Common front-end for the xmlRead functions
13215 *
13216 * Returns the resulting document tree or NULL
13217 */
13218static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013219xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13220 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013221{
13222 xmlDocPtr ret;
13223
13224 xmlCtxtUseOptions(ctxt, options);
13225 if (encoding != NULL) {
13226 xmlCharEncodingHandlerPtr hdlr;
13227
13228 hdlr = xmlFindCharEncodingHandler(encoding);
13229 if (hdlr != NULL)
13230 xmlSwitchToEncoding(ctxt, hdlr);
13231 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013232 if ((URL != NULL) && (ctxt->input != NULL) &&
13233 (ctxt->input->filename == NULL))
13234 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013235 xmlParseDocument(ctxt);
13236 if ((ctxt->wellFormed) || ctxt->recovery)
13237 ret = ctxt->myDoc;
13238 else {
13239 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013240 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013241 xmlFreeDoc(ctxt->myDoc);
13242 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013243 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013244 ctxt->myDoc = NULL;
13245 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013247 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013248
13249 return (ret);
13250}
13251
13252/**
13253 * xmlReadDoc:
13254 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013255 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013257 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013258 *
13259 * parse an XML in-memory document and build a tree.
13260 *
13261 * Returns the resulting document tree
13262 */
13263xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013264xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013265{
13266 xmlParserCtxtPtr ctxt;
13267
13268 if (cur == NULL)
13269 return (NULL);
13270
13271 ctxt = xmlCreateDocParserCtxt(cur);
13272 if (ctxt == NULL)
13273 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013274 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013275}
13276
13277/**
13278 * xmlReadFile:
13279 * @filename: a file or URL
13280 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013281 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013282 *
13283 * parse an XML file from the filesystem or the network.
13284 *
13285 * Returns the resulting document tree
13286 */
13287xmlDocPtr
13288xmlReadFile(const char *filename, const char *encoding, int options)
13289{
13290 xmlParserCtxtPtr ctxt;
13291
Daniel Veillard61b93382003-11-03 14:28:31 +000013292 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013293 if (ctxt == NULL)
13294 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013295 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013296}
13297
13298/**
13299 * xmlReadMemory:
13300 * @buffer: a pointer to a char array
13301 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013302 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013303 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013304 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013305 *
13306 * parse an XML in-memory document and build a tree.
13307 *
13308 * Returns the resulting document tree
13309 */
13310xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013311xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013312{
13313 xmlParserCtxtPtr ctxt;
13314
13315 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13316 if (ctxt == NULL)
13317 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013318 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013319}
13320
13321/**
13322 * xmlReadFd:
13323 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013324 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013325 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013326 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327 *
13328 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013329 * NOTE that the file descriptor will not be closed when the
13330 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013331 *
13332 * Returns the resulting document tree
13333 */
13334xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013335xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013336{
13337 xmlParserCtxtPtr ctxt;
13338 xmlParserInputBufferPtr input;
13339 xmlParserInputPtr stream;
13340
13341 if (fd < 0)
13342 return (NULL);
13343
13344 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13345 if (input == NULL)
13346 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013347 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013348 ctxt = xmlNewParserCtxt();
13349 if (ctxt == NULL) {
13350 xmlFreeParserInputBuffer(input);
13351 return (NULL);
13352 }
13353 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13354 if (stream == NULL) {
13355 xmlFreeParserInputBuffer(input);
13356 xmlFreeParserCtxt(ctxt);
13357 return (NULL);
13358 }
13359 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013360 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013361}
13362
13363/**
13364 * xmlReadIO:
13365 * @ioread: an I/O read function
13366 * @ioclose: an I/O close function
13367 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013368 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013369 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013370 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013371 *
13372 * parse an XML document from I/O functions and source and build a tree.
13373 *
13374 * Returns the resulting document tree
13375 */
13376xmlDocPtr
13377xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013378 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013379{
13380 xmlParserCtxtPtr ctxt;
13381 xmlParserInputBufferPtr input;
13382 xmlParserInputPtr stream;
13383
13384 if (ioread == NULL)
13385 return (NULL);
13386
13387 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13388 XML_CHAR_ENCODING_NONE);
13389 if (input == NULL)
13390 return (NULL);
13391 ctxt = xmlNewParserCtxt();
13392 if (ctxt == NULL) {
13393 xmlFreeParserInputBuffer(input);
13394 return (NULL);
13395 }
13396 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13397 if (stream == NULL) {
13398 xmlFreeParserInputBuffer(input);
13399 xmlFreeParserCtxt(ctxt);
13400 return (NULL);
13401 }
13402 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013403 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013404}
13405
13406/**
13407 * xmlCtxtReadDoc:
13408 * @ctxt: an XML parser context
13409 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013410 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013411 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013412 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013413 *
13414 * parse an XML in-memory document and build a tree.
13415 * This reuses the existing @ctxt parser context
13416 *
13417 * Returns the resulting document tree
13418 */
13419xmlDocPtr
13420xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013421 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013422{
13423 xmlParserInputPtr stream;
13424
13425 if (cur == NULL)
13426 return (NULL);
13427 if (ctxt == NULL)
13428 return (NULL);
13429
13430 xmlCtxtReset(ctxt);
13431
13432 stream = xmlNewStringInputStream(ctxt, cur);
13433 if (stream == NULL) {
13434 return (NULL);
13435 }
13436 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013437 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013438}
13439
13440/**
13441 * xmlCtxtReadFile:
13442 * @ctxt: an XML parser context
13443 * @filename: a file or URL
13444 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013445 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013446 *
13447 * parse an XML file from the filesystem or the network.
13448 * This reuses the existing @ctxt parser context
13449 *
13450 * Returns the resulting document tree
13451 */
13452xmlDocPtr
13453xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13454 const char *encoding, int options)
13455{
13456 xmlParserInputPtr stream;
13457
13458 if (filename == NULL)
13459 return (NULL);
13460 if (ctxt == NULL)
13461 return (NULL);
13462
13463 xmlCtxtReset(ctxt);
13464
Daniel Veillard29614c72004-11-26 10:47:26 +000013465 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013466 if (stream == NULL) {
13467 return (NULL);
13468 }
13469 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013470 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013471}
13472
13473/**
13474 * xmlCtxtReadMemory:
13475 * @ctxt: an XML parser context
13476 * @buffer: a pointer to a char array
13477 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013478 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013479 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013480 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013481 *
13482 * parse an XML in-memory document and build a tree.
13483 * This reuses the existing @ctxt parser context
13484 *
13485 * Returns the resulting document tree
13486 */
13487xmlDocPtr
13488xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013489 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013490{
13491 xmlParserInputBufferPtr input;
13492 xmlParserInputPtr stream;
13493
13494 if (ctxt == NULL)
13495 return (NULL);
13496 if (buffer == NULL)
13497 return (NULL);
13498
13499 xmlCtxtReset(ctxt);
13500
13501 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13502 if (input == NULL) {
13503 return(NULL);
13504 }
13505
13506 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13507 if (stream == NULL) {
13508 xmlFreeParserInputBuffer(input);
13509 return(NULL);
13510 }
13511
13512 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013513 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013514}
13515
13516/**
13517 * xmlCtxtReadFd:
13518 * @ctxt: an XML parser context
13519 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013520 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013521 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013522 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013523 *
13524 * parse an XML from a file descriptor and build a tree.
13525 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013526 * NOTE that the file descriptor will not be closed when the
13527 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013528 *
13529 * Returns the resulting document tree
13530 */
13531xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013532xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13533 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013534{
13535 xmlParserInputBufferPtr input;
13536 xmlParserInputPtr stream;
13537
13538 if (fd < 0)
13539 return (NULL);
13540 if (ctxt == NULL)
13541 return (NULL);
13542
13543 xmlCtxtReset(ctxt);
13544
13545
13546 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13547 if (input == NULL)
13548 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013549 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013550 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13551 if (stream == NULL) {
13552 xmlFreeParserInputBuffer(input);
13553 return (NULL);
13554 }
13555 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013556 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013557}
13558
13559/**
13560 * xmlCtxtReadIO:
13561 * @ctxt: an XML parser context
13562 * @ioread: an I/O read function
13563 * @ioclose: an I/O close function
13564 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013565 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013566 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013567 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013568 *
13569 * parse an XML document from I/O functions and source and build a tree.
13570 * This reuses the existing @ctxt parser context
13571 *
13572 * Returns the resulting document tree
13573 */
13574xmlDocPtr
13575xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13576 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013577 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013578 const char *encoding, int options)
13579{
13580 xmlParserInputBufferPtr input;
13581 xmlParserInputPtr stream;
13582
13583 if (ioread == NULL)
13584 return (NULL);
13585 if (ctxt == NULL)
13586 return (NULL);
13587
13588 xmlCtxtReset(ctxt);
13589
13590 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13591 XML_CHAR_ENCODING_NONE);
13592 if (input == NULL)
13593 return (NULL);
13594 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13595 if (stream == NULL) {
13596 xmlFreeParserInputBuffer(input);
13597 return (NULL);
13598 }
13599 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013600 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013601}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013602
13603#define bottom_parser
13604#include "elfgcchack.h"