blob: bd445855c23c5b51026f923465ed18934f4b3dcc [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
3077 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003213 } else
3214 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217
3218mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003219 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003221}
3222
3223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224 * xmlParseAttValue:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3230 *
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3233 *
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3252 *
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3254 */
3255
3256
3257xmlChar *
3258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261}
3262
3263/**
Owen Taylor3473f882001-02-23 17:55:21 +00003264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3266 *
3267 * parse an XML Literal
3268 *
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3270 *
3271 * Returns the SystemLiteral parsed or NULL
3272 */
3273
3274xmlChar *
3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3277 int len = 0;
3278 int size = XML_PARSER_BUFFER_SIZE;
3279 int cur, l;
3280 xmlChar stop;
3281 int state = ctxt->instate;
3282 int count = 0;
3283
3284 SHRINK;
3285 if (RAW == '"') {
3286 NEXT;
3287 stop = '"';
3288 } else if (RAW == '\'') {
3289 NEXT;
3290 stop = '\'';
3291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003293 return(NULL);
3294 }
3295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 xmlChar *tmp;
3306
Owen Taylor3473f882001-02-23 17:55:21 +00003307 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3309 if (tmp == NULL) {
3310 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 ctxt->instate = (xmlParserInputState) state;
3313 return(NULL);
3314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003316 }
3317 count++;
3318 if (count > 50) {
3319 GROW;
3320 count = 0;
3321 }
3322 COPY_BUF(l,buf,len,cur);
3323 NEXTL(l);
3324 cur = CUR_CHAR(l);
3325 if (cur == 0) {
3326 GROW;
3327 SHRINK;
3328 cur = CUR_CHAR(l);
3329 }
3330 }
3331 buf[len] = 0;
3332 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003333 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 } else {
3336 NEXT;
3337 }
3338 return(buf);
3339}
3340
3341/**
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML public literal
3346 *
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3348 *
3349 * Returns the PubidLiteral parsed or NULL.
3350 */
3351
3352xmlChar *
3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3355 int len = 0;
3356 int size = XML_PARSER_BUFFER_SIZE;
3357 xmlChar cur;
3358 xmlChar stop;
3359 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003360 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 SHRINK;
3363 if (RAW == '"') {
3364 NEXT;
3365 stop = '"';
3366 } else if (RAW == '\'') {
3367 NEXT;
3368 stop = '\'';
3369 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003371 return(NULL);
3372 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 xmlChar *tmp;
3383
Owen Taylor3473f882001-02-23 17:55:21 +00003384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(NULL);
3390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 }
3393 buf[len++] = cur;
3394 count++;
3395 if (count > 50) {
3396 GROW;
3397 count = 0;
3398 }
3399 NEXT;
3400 cur = CUR;
3401 if (cur == 0) {
3402 GROW;
3403 SHRINK;
3404 cur = CUR;
3405 }
3406 }
3407 buf[len] = 0;
3408 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 } else {
3411 NEXT;
3412 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003413 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(buf);
3415}
3416
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003418
3419/*
3420 * used for the test in the inner loop of the char data testing
3421 */
3422static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3455};
3456
Owen Taylor3473f882001-02-23 17:55:21 +00003457/**
3458 * xmlParseCharData:
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3461 *
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3464 *
3465 * The right angle bracket (>) may be represented using the string "&gt;",
3466 * and must, for compatibility, be escaped using "&gt;" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3469 *
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3471 */
3472
3473void
3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003479 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003480
3481 SHRINK;
3482 GROW;
3483 /*
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003487 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003488 in = ctxt->input->cur;
3489 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003490get_more_space:
3491 while (*in == 0x20) in++;
3492 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003494 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003496 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003497 goto get_more_space;
3498 }
3499 if (*in == '<') {
3500 nbchar = in - ctxt->input->cur;
3501 if (nbchar > 0) {
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3504
Daniel Veillard34099b42004-11-04 17:34:35 +00003505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 if (*ctxt->space == -1)
3517 *ctxt->space = -2;
3518 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003521 ctxt->sax->characters(ctxt->userData,
3522 tmp, nbchar);
3523 }
3524 }
3525 return;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003528get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003529 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003530 while (test_char_data[*in]) {
3531 in++;
3532 ccol++;
3533 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003535 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003536 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003537 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003539 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003540 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003541 }
3542 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003545 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003546 return;
3547 }
3548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 goto get_more;
3551 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003552 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003553 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003556 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003557 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003565 } else {
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
3569 if (*ctxt->space == -1)
3570 *ctxt->space = -2;
3571 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003572 line = ctxt->input->line;
3573 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003574 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003578 line = ctxt->input->line;
3579 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003583 if (*in == 0xD) {
3584 in++;
3585 if (*in == 0xA) {
3586 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003588 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 in--;
3592 }
3593 if (*in == '<') {
3594 return;
3595 }
3596 if (*in == '&') {
3597 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
3599 SHRINK;
3600 GROW;
3601 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 nbchar = 0;
3604 }
Daniel Veillard50582112001-03-26 22:52:16 +00003605 ctxt->input->line = line;
3606 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 xmlParseCharDataComplex(ctxt, cdata);
3608}
3609
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610/**
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3614 *
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3618 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003619void
3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3622 int nbchar = 0;
3623 int cur, l;
3624 int count = 0;
3625
3626 SHRINK;
3627 GROW;
3628 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003629 while ((cur != '<') && /* checked */
3630 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if ((cur == ']') && (NXT(1) == ']') &&
3633 (NXT(2) == '>')) {
3634 if (cdata) break;
3635 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003637 }
3638 }
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003641 buf[nbchar] = 0;
3642
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003647 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3650 buf, nbchar);
3651 } else {
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 nbchar = 0;
3661 }
3662 count++;
3663 if (count > 50) {
3664 GROW;
3665 count = 0;
3666 }
3667 NEXTL(l);
3668 cur = CUR_CHAR(l);
3669 }
3670 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003671 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003673 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003674 */
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003676 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3684 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 }
3687 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3692 cur);
3693 NEXTL(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695}
3696
3697/**
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3703 *
3704 * Parse an External ID or a Public ID
3705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3708 *
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3711 *
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3713 *
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3717 */
3718
3719xmlChar *
3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3722
3723 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003724
3725 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003727 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 SKIP_BLANKS;
3733 URI = xmlParseSystemLiteral(ctxt);
3734 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (strict) {
3749 /*
3750 * We don't handle [83] so "S SystemLiteral" is required.
3751 */
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 } else {
3757 /*
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3761 */
3762 const xmlChar *ptr;
3763 GROW;
3764
3765 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767
William M. Brack76e95df2003-10-18 16:20:14 +00003768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3770 }
3771 SKIP_BLANKS;
3772 URI = xmlParseSystemLiteral(ctxt);
3773 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 }
3777 return(URI);
3778}
3779
3780/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003782 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003786 *
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003790 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3793 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003794static void
3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 int q, ql;
3797 int r, rl;
3798 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 xmlParserInputPtr input = ctxt->input;
3800 int count = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003803 len = 0;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3806 if (buf == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 return;
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003812 if (q == 0)
3813 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 NEXTL(ql);
3815 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003816 if (r == 0)
3817 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003818 NEXTL(rl);
3819 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003820 if (cur == 0)
3821 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003822 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003823 ((cur != '>') ||
3824 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003825 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003826 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003827 }
3828 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003829 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003830 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003831 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3832 if (new_buf == NULL) {
3833 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003834 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003835 return;
3836 }
William M. Bracka3215c72004-07-31 16:24:01 +00003837 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
3839 COPY_BUF(ql,buf,len,q);
3840 q = r;
3841 ql = rl;
3842 r = cur;
3843 rl = l;
3844
3845 count++;
3846 if (count > 50) {
3847 GROW;
3848 count = 0;
3849 }
3850 NEXTL(l);
3851 cur = CUR_CHAR(l);
3852 if (cur == 0) {
3853 SHRINK;
3854 GROW;
3855 cur = CUR_CHAR(l);
3856 }
3857 }
3858 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003859 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003861 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003862 xmlFree(buf);
3863 } else {
3864 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003867 }
3868 NEXT;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870 (!ctxt->disableSAX))
3871 ctxt->sax->comment(ctxt->userData, buf);
3872 xmlFree(buf);
3873 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003874 return;
3875not_terminated:
3876 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3877 "Comment not terminated\n", NULL);
3878 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003879}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003880/**
3881 * xmlParseComment:
3882 * @ctxt: an XML parser context
3883 *
3884 * Skip an XML (SGML) comment <!-- .... -->
3885 * The spec says that "For compatibility, the string "--" (double-hyphen)
3886 * must not occur within comments. "
3887 *
3888 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3889 */
3890void
3891xmlParseComment(xmlParserCtxtPtr ctxt) {
3892 xmlChar *buf = NULL;
3893 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003894 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003895 xmlParserInputState state;
3896 const xmlChar *in;
3897 int nbchar = 0, ccol;
3898
3899 /*
3900 * Check that there is a comment right here.
3901 */
3902 if ((RAW != '<') || (NXT(1) != '!') ||
3903 (NXT(2) != '-') || (NXT(3) != '-')) return;
3904
3905 state = ctxt->instate;
3906 ctxt->instate = XML_PARSER_COMMENT;
3907 SKIP(4);
3908 SHRINK;
3909 GROW;
3910
3911 /*
3912 * Accelerated common case where input don't need to be
3913 * modified before passing it to the handler.
3914 */
3915 in = ctxt->input->cur;
3916 do {
3917 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003918 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003919 ctxt->input->line++; ctxt->input->col = 1;
3920 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003921 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003922 }
3923get_more:
3924 ccol = ctxt->input->col;
3925 while (((*in > '-') && (*in <= 0x7F)) ||
3926 ((*in >= 0x20) && (*in < '-')) ||
3927 (*in == 0x09)) {
3928 in++;
3929 ccol++;
3930 }
3931 ctxt->input->col = ccol;
3932 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003933 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003934 ctxt->input->line++; ctxt->input->col = 1;
3935 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003936 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003937 goto get_more;
3938 }
3939 nbchar = in - ctxt->input->cur;
3940 /*
3941 * save current set of data
3942 */
3943 if (nbchar > 0) {
3944 if ((ctxt->sax != NULL) &&
3945 (ctxt->sax->comment != NULL)) {
3946 if (buf == NULL) {
3947 if ((*in == '-') && (in[1] == '-'))
3948 size = nbchar + 1;
3949 else
3950 size = XML_PARSER_BUFFER_SIZE + nbchar;
3951 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3952 if (buf == NULL) {
3953 xmlErrMemory(ctxt, NULL);
3954 ctxt->instate = state;
3955 return;
3956 }
3957 len = 0;
3958 } else if (len + nbchar + 1 >= size) {
3959 xmlChar *new_buf;
3960 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3961 new_buf = (xmlChar *) xmlRealloc(buf,
3962 size * sizeof(xmlChar));
3963 if (new_buf == NULL) {
3964 xmlFree (buf);
3965 xmlErrMemory(ctxt, NULL);
3966 ctxt->instate = state;
3967 return;
3968 }
3969 buf = new_buf;
3970 }
3971 memcpy(&buf[len], ctxt->input->cur, nbchar);
3972 len += nbchar;
3973 buf[len] = 0;
3974 }
3975 }
3976 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003977 if (*in == 0xA) {
3978 in++;
3979 ctxt->input->line++; ctxt->input->col = 1;
3980 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003981 if (*in == 0xD) {
3982 in++;
3983 if (*in == 0xA) {
3984 ctxt->input->cur = in;
3985 in++;
3986 ctxt->input->line++; ctxt->input->col = 1;
3987 continue; /* while */
3988 }
3989 in--;
3990 }
3991 SHRINK;
3992 GROW;
3993 in = ctxt->input->cur;
3994 if (*in == '-') {
3995 if (in[1] == '-') {
3996 if (in[2] == '>') {
3997 SKIP(3);
3998 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3999 (!ctxt->disableSAX)) {
4000 if (buf != NULL)
4001 ctxt->sax->comment(ctxt->userData, buf);
4002 else
4003 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4004 }
4005 if (buf != NULL)
4006 xmlFree(buf);
4007 ctxt->instate = state;
4008 return;
4009 }
4010 if (buf != NULL)
4011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4012 "Comment not terminated \n<!--%.50s\n",
4013 buf);
4014 else
4015 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4016 "Comment not terminated \n", NULL);
4017 in++;
4018 ctxt->input->col++;
4019 }
4020 in++;
4021 ctxt->input->col++;
4022 goto get_more;
4023 }
4024 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4025 xmlParseCommentComplex(ctxt, buf, len, size);
4026 ctxt->instate = state;
4027 return;
4028}
4029
Owen Taylor3473f882001-02-23 17:55:21 +00004030
4031/**
4032 * xmlParsePITarget:
4033 * @ctxt: an XML parser context
4034 *
4035 * parse the name of a PI
4036 *
4037 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4038 *
4039 * Returns the PITarget name or NULL
4040 */
4041
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004042const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004043xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004044 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004045
4046 name = xmlParseName(ctxt);
4047 if ((name != NULL) &&
4048 ((name[0] == 'x') || (name[0] == 'X')) &&
4049 ((name[1] == 'm') || (name[1] == 'M')) &&
4050 ((name[2] == 'l') || (name[2] == 'L'))) {
4051 int i;
4052 if ((name[0] == 'x') && (name[1] == 'm') &&
4053 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004054 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004055 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004056 return(name);
4057 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004058 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004059 return(name);
4060 }
4061 for (i = 0;;i++) {
4062 if (xmlW3CPIs[i] == NULL) break;
4063 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4064 return(name);
4065 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004066 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4067 "xmlParsePITarget: invalid name prefix 'xml'\n",
4068 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070 return(name);
4071}
4072
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073#ifdef LIBXML_CATALOG_ENABLED
4074/**
4075 * xmlParseCatalogPI:
4076 * @ctxt: an XML parser context
4077 * @catalog: the PI value string
4078 *
4079 * parse an XML Catalog Processing Instruction.
4080 *
4081 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4082 *
4083 * Occurs only if allowed by the user and if happening in the Misc
4084 * part of the document before any doctype informations
4085 * This will add the given catalog to the parsing context in order
4086 * to be used if there is a resolution need further down in the document
4087 */
4088
4089static void
4090xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4091 xmlChar *URL = NULL;
4092 const xmlChar *tmp, *base;
4093 xmlChar marker;
4094
4095 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004096 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004097 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4098 goto error;
4099 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004100 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004101 if (*tmp != '=') {
4102 return;
4103 }
4104 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004105 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004106 marker = *tmp;
4107 if ((marker != '\'') && (marker != '"'))
4108 goto error;
4109 tmp++;
4110 base = tmp;
4111 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4112 if (*tmp == 0)
4113 goto error;
4114 URL = xmlStrndup(base, tmp - base);
4115 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004116 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004117 if (*tmp != 0)
4118 goto error;
4119
4120 if (URL != NULL) {
4121 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4122 xmlFree(URL);
4123 }
4124 return;
4125
4126error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004127 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4128 "Catalog PI syntax error: %s\n",
4129 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004130 if (URL != NULL)
4131 xmlFree(URL);
4132}
4133#endif
4134
Owen Taylor3473f882001-02-23 17:55:21 +00004135/**
4136 * xmlParsePI:
4137 * @ctxt: an XML parser context
4138 *
4139 * parse an XML Processing Instruction.
4140 *
4141 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4142 *
4143 * The processing is transfered to SAX once parsed.
4144 */
4145
4146void
4147xmlParsePI(xmlParserCtxtPtr ctxt) {
4148 xmlChar *buf = NULL;
4149 int len = 0;
4150 int size = XML_PARSER_BUFFER_SIZE;
4151 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004152 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004153 xmlParserInputState state;
4154 int count = 0;
4155
4156 if ((RAW == '<') && (NXT(1) == '?')) {
4157 xmlParserInputPtr input = ctxt->input;
4158 state = ctxt->instate;
4159 ctxt->instate = XML_PARSER_PI;
4160 /*
4161 * this is a Processing Instruction.
4162 */
4163 SKIP(2);
4164 SHRINK;
4165
4166 /*
4167 * Parse the target name and check for special support like
4168 * namespace.
4169 */
4170 target = xmlParsePITarget(ctxt);
4171 if (target != NULL) {
4172 if ((RAW == '?') && (NXT(1) == '>')) {
4173 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4175 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004176 }
4177 SKIP(2);
4178
4179 /*
4180 * SAX: PI detected.
4181 */
4182 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4183 (ctxt->sax->processingInstruction != NULL))
4184 ctxt->sax->processingInstruction(ctxt->userData,
4185 target, NULL);
4186 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 return;
4188 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004189 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004191 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004192 ctxt->instate = state;
4193 return;
4194 }
4195 cur = CUR;
4196 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004197 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4198 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 SKIP_BLANKS;
4201 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004202 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004203 ((cur != '?') || (NXT(1) != '>'))) {
4204 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004205 xmlChar *tmp;
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4209 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004210 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004211 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 ctxt->instate = state;
4213 return;
4214 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004215 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 count++;
4218 if (count > 50) {
4219 GROW;
4220 count = 0;
4221 }
4222 COPY_BUF(l,buf,len,cur);
4223 NEXTL(l);
4224 cur = CUR_CHAR(l);
4225 if (cur == 0) {
4226 SHRINK;
4227 GROW;
4228 cur = CUR_CHAR(l);
4229 }
4230 }
4231 buf[len] = 0;
4232 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004233 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4234 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 } else {
4236 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4238 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004239 }
4240 SKIP(2);
4241
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004242#ifdef LIBXML_CATALOG_ENABLED
4243 if (((state == XML_PARSER_MISC) ||
4244 (state == XML_PARSER_START)) &&
4245 (xmlStrEqual(target, XML_CATALOG_PI))) {
4246 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4247 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4248 (allow == XML_CATA_ALLOW_ALL))
4249 xmlParseCatalogPI(ctxt, buf);
4250 }
4251#endif
4252
4253
Owen Taylor3473f882001-02-23 17:55:21 +00004254 /*
4255 * SAX: PI detected.
4256 */
4257 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->processingInstruction != NULL))
4259 ctxt->sax->processingInstruction(ctxt->userData,
4260 target, buf);
4261 }
4262 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 }
4266 ctxt->instate = state;
4267 }
4268}
4269
4270/**
4271 * xmlParseNotationDecl:
4272 * @ctxt: an XML parser context
4273 *
4274 * parse a notation declaration
4275 *
4276 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4277 *
4278 * Hence there is actually 3 choices:
4279 * 'PUBLIC' S PubidLiteral
4280 * 'PUBLIC' S PubidLiteral S SystemLiteral
4281 * and 'SYSTEM' S SystemLiteral
4282 *
4283 * See the NOTE on xmlParseExternalID().
4284 */
4285
4286void
4287xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004288 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 xmlChar *Pubid;
4290 xmlChar *Systemid;
4291
Daniel Veillarda07050d2003-10-19 14:46:32 +00004292 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004293 xmlParserInputPtr input = ctxt->input;
4294 SHRINK;
4295 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004299 return;
4300 }
4301 SKIP_BLANKS;
4302
Daniel Veillard76d66f42001-05-16 21:05:17 +00004303 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004305 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 return;
4307 }
William M. Brack76e95df2003-10-18 16:20:14 +00004308 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004310 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004311 return;
4312 }
4313 SKIP_BLANKS;
4314
4315 /*
4316 * Parse the IDs.
4317 */
4318 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4319 SKIP_BLANKS;
4320
4321 if (RAW == '>') {
4322 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4324 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326 NEXT;
4327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4328 (ctxt->sax->notationDecl != NULL))
4329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (Systemid != NULL) xmlFree(Systemid);
4334 if (Pubid != NULL) xmlFree(Pubid);
4335 }
4336}
4337
4338/**
4339 * xmlParseEntityDecl:
4340 * @ctxt: an XML parser context
4341 *
4342 * parse <!ENTITY declarations
4343 *
4344 * [70] EntityDecl ::= GEDecl | PEDecl
4345 *
4346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4347 *
4348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4349 *
4350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4351 *
4352 * [74] PEDef ::= EntityValue | ExternalID
4353 *
4354 * [76] NDataDecl ::= S 'NDATA' S Name
4355 *
4356 * [ VC: Notation Declared ]
4357 * The Name must match the declared name of a notation.
4358 */
4359
4360void
4361xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004362 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 xmlChar *value = NULL;
4364 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004365 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 int isParameter = 0;
4367 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004368 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004369
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004371 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004372 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SHRINK;
4374 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004375 skipped = SKIP_BLANKS;
4376 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004377 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4378 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
Owen Taylor3473f882001-02-23 17:55:21 +00004380
4381 if (RAW == '%') {
4382 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004383 skipped = SKIP_BLANKS;
4384 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4386 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004387 }
Owen Taylor3473f882001-02-23 17:55:21 +00004388 isParameter = 1;
4389 }
4390
Daniel Veillard76d66f42001-05-16 21:05:17 +00004391 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004393 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4394 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004395 return;
4396 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004397 skipped = SKIP_BLANKS;
4398 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4400 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004401 }
Owen Taylor3473f882001-02-23 17:55:21 +00004402
Daniel Veillardf5582f12002-06-11 10:08:16 +00004403 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 /*
4405 * handle the various case of definitions...
4406 */
4407 if (isParameter) {
4408 if ((RAW == '"') || (RAW == '\'')) {
4409 value = xmlParseEntityValue(ctxt, &orig);
4410 if (value) {
4411 if ((ctxt->sax != NULL) &&
4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413 ctxt->sax->entityDecl(ctxt->userData, name,
4414 XML_INTERNAL_PARAMETER_ENTITY,
4415 NULL, NULL, value);
4416 }
4417 } else {
4418 URI = xmlParseExternalID(ctxt, &literal, 1);
4419 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004421 }
4422 if (URI) {
4423 xmlURIPtr uri;
4424
4425 uri = xmlParseURI((const char *) URI);
4426 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004429 /*
4430 * This really ought to be a well formedness error
4431 * but the XML Core WG decided otherwise c.f. issue
4432 * E26 of the XML erratas.
4433 */
Owen Taylor3473f882001-02-23 17:55:21 +00004434 } else {
4435 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004436 /*
4437 * Okay this is foolish to block those but not
4438 * invalid URIs.
4439 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004441 } else {
4442 if ((ctxt->sax != NULL) &&
4443 (!ctxt->disableSAX) &&
4444 (ctxt->sax->entityDecl != NULL))
4445 ctxt->sax->entityDecl(ctxt->userData, name,
4446 XML_EXTERNAL_PARAMETER_ENTITY,
4447 literal, URI, NULL);
4448 }
4449 xmlFreeURI(uri);
4450 }
4451 }
4452 }
4453 } else {
4454 if ((RAW == '"') || (RAW == '\'')) {
4455 value = xmlParseEntityValue(ctxt, &orig);
4456 if ((ctxt->sax != NULL) &&
4457 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4458 ctxt->sax->entityDecl(ctxt->userData, name,
4459 XML_INTERNAL_GENERAL_ENTITY,
4460 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004461 /*
4462 * For expat compatibility in SAX mode.
4463 */
4464 if ((ctxt->myDoc == NULL) ||
4465 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4466 if (ctxt->myDoc == NULL) {
4467 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4468 }
4469 if (ctxt->myDoc->intSubset == NULL)
4470 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4471 BAD_CAST "fake", NULL, NULL);
4472
Daniel Veillard1af9a412003-08-20 22:54:39 +00004473 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4474 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004475 }
Owen Taylor3473f882001-02-23 17:55:21 +00004476 } else {
4477 URI = xmlParseExternalID(ctxt, &literal, 1);
4478 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 }
4481 if (URI) {
4482 xmlURIPtr uri;
4483
4484 uri = xmlParseURI((const char *)URI);
4485 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4487 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004488 /*
4489 * This really ought to be a well formedness error
4490 * but the XML Core WG decided otherwise c.f. issue
4491 * E26 of the XML erratas.
4492 */
Owen Taylor3473f882001-02-23 17:55:21 +00004493 } else {
4494 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004495 /*
4496 * Okay this is foolish to block those but not
4497 * invalid URIs.
4498 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004500 }
4501 xmlFreeURI(uri);
4502 }
4503 }
William M. Brack76e95df2003-10-18 16:20:14 +00004504 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4506 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004507 }
4508 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004509 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004510 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 }
4515 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004516 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4518 (ctxt->sax->unparsedEntityDecl != NULL))
4519 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4520 literal, URI, ndata);
4521 } else {
4522 if ((ctxt->sax != NULL) &&
4523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4524 ctxt->sax->entityDecl(ctxt->userData, name,
4525 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4526 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004527 /*
4528 * For expat compatibility in SAX mode.
4529 * assuming the entity repalcement was asked for
4530 */
4531 if ((ctxt->replaceEntities != 0) &&
4532 ((ctxt->myDoc == NULL) ||
4533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4534 if (ctxt->myDoc == NULL) {
4535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4536 }
4537
4538 if (ctxt->myDoc->intSubset == NULL)
4539 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4540 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004541 xmlSAX2EntityDecl(ctxt, name,
4542 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4543 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004544 }
Owen Taylor3473f882001-02-23 17:55:21 +00004545 }
4546 }
4547 }
4548 SKIP_BLANKS;
4549 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004551 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 } else {
4553 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004554 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4555 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004556 }
4557 NEXT;
4558 }
4559 if (orig != NULL) {
4560 /*
4561 * Ugly mechanism to save the raw entity value.
4562 */
4563 xmlEntityPtr cur = NULL;
4564
4565 if (isParameter) {
4566 if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->getParameterEntity != NULL))
4568 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4569 } else {
4570 if ((ctxt->sax != NULL) &&
4571 (ctxt->sax->getEntity != NULL))
4572 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004573 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004574 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004575 }
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
4577 if (cur != NULL) {
4578 if (cur->orig != NULL)
4579 xmlFree(orig);
4580 else
4581 cur->orig = orig;
4582 } else
4583 xmlFree(orig);
4584 }
Owen Taylor3473f882001-02-23 17:55:21 +00004585 if (value != NULL) xmlFree(value);
4586 if (URI != NULL) xmlFree(URI);
4587 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589}
4590
4591/**
4592 * xmlParseDefaultDecl:
4593 * @ctxt: an XML parser context
4594 * @value: Receive a possible fixed default value for the attribute
4595 *
4596 * Parse an attribute default declaration
4597 *
4598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4599 *
4600 * [ VC: Required Attribute ]
4601 * if the default declaration is the keyword #REQUIRED, then the
4602 * attribute must be specified for all elements of the type in the
4603 * attribute-list declaration.
4604 *
4605 * [ VC: Attribute Default Legal ]
4606 * The declared default value must meet the lexical constraints of
4607 * the declared attribute type c.f. xmlValidateAttributeDecl()
4608 *
4609 * [ VC: Fixed Attribute Default ]
4610 * if an attribute has a default value declared with the #FIXED
4611 * keyword, instances of that attribute must match the default value.
4612 *
4613 * [ WFC: No < in Attribute Values ]
4614 * handled in xmlParseAttValue()
4615 *
4616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4617 * or XML_ATTRIBUTE_FIXED.
4618 */
4619
4620int
4621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4622 int val;
4623 xmlChar *ret;
4624
4625 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004626 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004627 SKIP(9);
4628 return(XML_ATTRIBUTE_REQUIRED);
4629 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004630 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004631 SKIP(8);
4632 return(XML_ATTRIBUTE_IMPLIED);
4633 }
4634 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004635 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004636 SKIP(6);
4637 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004638 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4640 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 SKIP_BLANKS;
4643 }
4644 ret = xmlParseAttValue(ctxt);
4645 ctxt->instate = XML_PARSER_DTD;
4646 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004647 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004648 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 } else
4650 *value = ret;
4651 return(val);
4652}
4653
4654/**
4655 * xmlParseNotationType:
4656 * @ctxt: an XML parser context
4657 *
4658 * parse an Notation attribute type.
4659 *
4660 * Note: the leading 'NOTATION' S part has already being parsed...
4661 *
4662 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4663 *
4664 * [ VC: Notation Attributes ]
4665 * Values of this type must match one of the notation names included
4666 * in the declaration; all notation names in the declaration must be declared.
4667 *
4668 * Returns: the notation attribute tree built while parsing
4669 */
4670
4671xmlEnumerationPtr
4672xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004673 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4675
4676 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 return(NULL);
4679 }
4680 SHRINK;
4681 do {
4682 NEXT;
4683 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004684 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4687 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(ret);
4689 }
4690 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (cur == NULL) return(ret);
4692 if (last == NULL) ret = last = cur;
4693 else {
4694 last->next = cur;
4695 last = cur;
4696 }
4697 SKIP_BLANKS;
4698 } while (RAW == '|');
4699 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 if ((last != NULL) && (last != ret))
4702 xmlFreeEnumeration(last);
4703 return(ret);
4704 }
4705 NEXT;
4706 return(ret);
4707}
4708
4709/**
4710 * xmlParseEnumerationType:
4711 * @ctxt: an XML parser context
4712 *
4713 * parse an Enumeration attribute type.
4714 *
4715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4716 *
4717 * [ VC: Enumeration ]
4718 * Values of this type must match one of the Nmtoken tokens in
4719 * the declaration
4720 *
4721 * Returns: the enumeration attribute tree built while parsing
4722 */
4723
4724xmlEnumerationPtr
4725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4726 xmlChar *name;
4727 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4728
4729 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 return(NULL);
4732 }
4733 SHRINK;
4734 do {
4735 NEXT;
4736 SKIP_BLANKS;
4737 name = xmlParseNmtoken(ctxt);
4738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 return(ret);
4741 }
4742 cur = xmlCreateEnumeration(name);
4743 xmlFree(name);
4744 if (cur == NULL) return(ret);
4745 if (last == NULL) ret = last = cur;
4746 else {
4747 last->next = cur;
4748 last = cur;
4749 }
4750 SKIP_BLANKS;
4751 } while (RAW == '|');
4752 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004753 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004754 return(ret);
4755 }
4756 NEXT;
4757 return(ret);
4758}
4759
4760/**
4761 * xmlParseEnumeratedType:
4762 * @ctxt: an XML parser context
4763 * @tree: the enumeration tree built while parsing
4764 *
4765 * parse an Enumerated attribute type.
4766 *
4767 * [57] EnumeratedType ::= NotationType | Enumeration
4768 *
4769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4770 *
4771 *
4772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4773 */
4774
4775int
4776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004777 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004778 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004779 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4781 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004782 return(0);
4783 }
4784 SKIP_BLANKS;
4785 *tree = xmlParseNotationType(ctxt);
4786 if (*tree == NULL) return(0);
4787 return(XML_ATTRIBUTE_NOTATION);
4788 }
4789 *tree = xmlParseEnumerationType(ctxt);
4790 if (*tree == NULL) return(0);
4791 return(XML_ATTRIBUTE_ENUMERATION);
4792}
4793
4794/**
4795 * xmlParseAttributeType:
4796 * @ctxt: an XML parser context
4797 * @tree: the enumeration tree built while parsing
4798 *
4799 * parse the Attribute list def for an element
4800 *
4801 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4802 *
4803 * [55] StringType ::= 'CDATA'
4804 *
4805 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4806 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4807 *
4808 * Validity constraints for attribute values syntax are checked in
4809 * xmlValidateAttributeValue()
4810 *
4811 * [ VC: ID ]
4812 * Values of type ID must match the Name production. A name must not
4813 * appear more than once in an XML document as a value of this type;
4814 * i.e., ID values must uniquely identify the elements which bear them.
4815 *
4816 * [ VC: One ID per Element Type ]
4817 * No element type may have more than one ID attribute specified.
4818 *
4819 * [ VC: ID Attribute Default ]
4820 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4821 *
4822 * [ VC: IDREF ]
4823 * Values of type IDREF must match the Name production, and values
4824 * of type IDREFS must match Names; each IDREF Name must match the value
4825 * of an ID attribute on some element in the XML document; i.e. IDREF
4826 * values must match the value of some ID attribute.
4827 *
4828 * [ VC: Entity Name ]
4829 * Values of type ENTITY must match the Name production, values
4830 * of type ENTITIES must match Names; each Entity Name must match the
4831 * name of an unparsed entity declared in the DTD.
4832 *
4833 * [ VC: Name Token ]
4834 * Values of type NMTOKEN must match the Nmtoken production; values
4835 * of type NMTOKENS must match Nmtokens.
4836 *
4837 * Returns the attribute type
4838 */
4839int
4840xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4841 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004842 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 SKIP(5);
4844 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004845 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004846 SKIP(6);
4847 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004848 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004849 SKIP(5);
4850 return(XML_ATTRIBUTE_IDREF);
4851 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4852 SKIP(2);
4853 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004854 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004855 SKIP(6);
4856 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004857 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004858 SKIP(8);
4859 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004860 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004861 SKIP(8);
4862 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004863 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 SKIP(7);
4865 return(XML_ATTRIBUTE_NMTOKEN);
4866 }
4867 return(xmlParseEnumeratedType(ctxt, tree));
4868}
4869
4870/**
4871 * xmlParseAttributeListDecl:
4872 * @ctxt: an XML parser context
4873 *
4874 * : parse the Attribute list def for an element
4875 *
4876 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4877 *
4878 * [53] AttDef ::= S Name S AttType S DefaultDecl
4879 *
4880 */
4881void
4882xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004883 const xmlChar *elemName;
4884 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004885 xmlEnumerationPtr tree;
4886
Daniel Veillarda07050d2003-10-19 14:46:32 +00004887 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004888 xmlParserInputPtr input = ctxt->input;
4889
4890 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004891 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004893 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 }
4895 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004896 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004897 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4899 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return;
4901 }
4902 SKIP_BLANKS;
4903 GROW;
4904 while (RAW != '>') {
4905 const xmlChar *check = CUR_PTR;
4906 int type;
4907 int def;
4908 xmlChar *defaultValue = NULL;
4909
4910 GROW;
4911 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004912 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004913 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4915 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004916 break;
4917 }
4918 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004921 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 break;
4923 }
4924 SKIP_BLANKS;
4925
4926 type = xmlParseAttributeType(ctxt, &tree);
4927 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 break;
4929 }
4930
4931 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004932 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4934 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004935 if (tree != NULL)
4936 xmlFreeEnumeration(tree);
4937 break;
4938 }
4939 SKIP_BLANKS;
4940
4941 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4942 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (defaultValue != NULL)
4944 xmlFree(defaultValue);
4945 if (tree != NULL)
4946 xmlFreeEnumeration(tree);
4947 break;
4948 }
4949
4950 GROW;
4951 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004954 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004955 if (defaultValue != NULL)
4956 xmlFree(defaultValue);
4957 if (tree != NULL)
4958 xmlFreeEnumeration(tree);
4959 break;
4960 }
4961 SKIP_BLANKS;
4962 }
4963 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4965 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004966 if (defaultValue != NULL)
4967 xmlFree(defaultValue);
4968 if (tree != NULL)
4969 xmlFreeEnumeration(tree);
4970 break;
4971 }
4972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973 (ctxt->sax->attributeDecl != NULL))
4974 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4975 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004976 else if (tree != NULL)
4977 xmlFreeEnumeration(tree);
4978
4979 if ((ctxt->sax2) && (defaultValue != NULL) &&
4980 (def != XML_ATTRIBUTE_IMPLIED) &&
4981 (def != XML_ATTRIBUTE_REQUIRED)) {
4982 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004984 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4985 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4986 }
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (defaultValue != NULL)
4988 xmlFree(defaultValue);
4989 GROW;
4990 }
4991 if (RAW == '>') {
4992 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004993 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004995 }
4996 NEXT;
4997 }
Owen Taylor3473f882001-02-23 17:55:21 +00004998 }
4999}
5000
5001/**
5002 * xmlParseElementMixedContentDecl:
5003 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005004 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005005 *
5006 * parse the declaration for a Mixed Element content
5007 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5008 *
5009 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5010 * '(' S? '#PCDATA' S? ')'
5011 *
5012 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5013 *
5014 * [ VC: No Duplicate Types ]
5015 * The same name must not appear more than once in a single
5016 * mixed-content declaration.
5017 *
5018 * returns: the list of the xmlElementContentPtr describing the element choices
5019 */
5020xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005021xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005023 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005024
5025 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005026 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005027 SKIP(7);
5028 SKIP_BLANKS;
5029 SHRINK;
5030 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005031 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005032 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5033"Element content declaration doesn't start and stop in the same entity\n",
5034 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005035 }
Owen Taylor3473f882001-02-23 17:55:21 +00005036 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005037 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005038 if (RAW == '*') {
5039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5040 NEXT;
5041 }
5042 return(ret);
5043 }
5044 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005045 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005046 if (ret == NULL) return(NULL);
5047 }
5048 while (RAW == '|') {
5049 NEXT;
5050 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005051 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 if (ret == NULL) return(NULL);
5053 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005054 if (cur != NULL)
5055 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005056 cur = ret;
5057 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005058 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005059 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005060 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005061 if (n->c1 != NULL)
5062 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005063 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005064 if (n != NULL)
5065 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005066 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 }
5068 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005069 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005073 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005074 return(NULL);
5075 }
5076 SKIP_BLANKS;
5077 GROW;
5078 }
5079 if ((RAW == ')') && (NXT(1) == '*')) {
5080 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005081 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005082 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005083 if (cur->c2 != NULL)
5084 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005085 }
5086 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5089"Element content declaration doesn't start and stop in the same entity\n",
5090 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 SKIP(2);
5093 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005094 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 return(NULL);
5097 }
5098
5099 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005100 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005101 }
5102 return(ret);
5103}
5104
5105/**
5106 * xmlParseElementChildrenContentDecl:
5107 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005108 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005109 *
5110 * parse the declaration for a Mixed Element content
5111 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5112 *
5113 *
5114 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5115 *
5116 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5117 *
5118 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5119 *
5120 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5121 *
5122 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5123 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005124 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005125 * opening or closing parentheses in a choice, seq, or Mixed
5126 * construct is contained in the replacement text for a parameter
5127 * entity, both must be contained in the same replacement text. For
5128 * interoperability, if a parameter-entity reference appears in a
5129 * choice, seq, or Mixed construct, its replacement text should not
5130 * be empty, and neither the first nor last non-blank character of
5131 * the replacement text should be a connector (| or ,).
5132 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005133 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005134 * hierarchy.
5135 */
5136xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005137xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005139 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005140 xmlChar type = 0;
5141
5142 SKIP_BLANKS;
5143 GROW;
5144 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005146
Owen Taylor3473f882001-02-23 17:55:21 +00005147 /* Recurse on first child */
5148 NEXT;
5149 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005151 SKIP_BLANKS;
5152 GROW;
5153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005156 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 return(NULL);
5158 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005159 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005160 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005161 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005162 return(NULL);
5163 }
Owen Taylor3473f882001-02-23 17:55:21 +00005164 GROW;
5165 if (RAW == '?') {
5166 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5167 NEXT;
5168 } else if (RAW == '*') {
5169 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5170 NEXT;
5171 } else if (RAW == '+') {
5172 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5173 NEXT;
5174 } else {
5175 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5176 }
Owen Taylor3473f882001-02-23 17:55:21 +00005177 GROW;
5178 }
5179 SKIP_BLANKS;
5180 SHRINK;
5181 while (RAW != ')') {
5182 /*
5183 * Each loop we parse one separator and one element.
5184 */
5185 if (RAW == ',') {
5186 if (type == 0) type = CUR;
5187
5188 /*
5189 * Detect "Name | Name , Name" error
5190 */
5191 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005193 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005195 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005198 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005199 return(NULL);
5200 }
5201 NEXT;
5202
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005203 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005205 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005206 xmlFreeDocElementContent(ctxt->myDoc, last);
5207 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 return(NULL);
5209 }
5210 if (last == NULL) {
5211 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005212 if (ret != NULL)
5213 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005214 ret = cur = op;
5215 } else {
5216 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005217 if (op != NULL)
5218 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005219 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005220 if (last != NULL)
5221 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005222 cur =op;
5223 last = NULL;
5224 }
5225 } else if (RAW == '|') {
5226 if (type == 0) type = CUR;
5227
5228 /*
5229 * Detect "Name , Name | Name" error
5230 */
5231 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005233 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005234 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005235 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005238 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 return(NULL);
5240 }
5241 NEXT;
5242
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005243 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005245 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005248 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005249 return(NULL);
5250 }
5251 if (last == NULL) {
5252 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005253 if (ret != NULL)
5254 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005255 ret = cur = op;
5256 } else {
5257 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005258 if (op != NULL)
5259 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005261 if (last != NULL)
5262 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005263 cur =op;
5264 last = NULL;
5265 }
5266 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005269 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(NULL);
5271 }
5272 GROW;
5273 SKIP_BLANKS;
5274 GROW;
5275 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005276 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 /* Recurse on second child */
5278 NEXT;
5279 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005281 SKIP_BLANKS;
5282 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005283 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005287 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005288 return(NULL);
5289 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005290 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005291 if (RAW == '?') {
5292 last->ocur = XML_ELEMENT_CONTENT_OPT;
5293 NEXT;
5294 } else if (RAW == '*') {
5295 last->ocur = XML_ELEMENT_CONTENT_MULT;
5296 NEXT;
5297 } else if (RAW == '+') {
5298 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5299 NEXT;
5300 } else {
5301 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5302 }
5303 }
5304 SKIP_BLANKS;
5305 GROW;
5306 }
5307 if ((cur != NULL) && (last != NULL)) {
5308 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005309 if (last != NULL)
5310 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005312 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005313 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5314"Element content declaration doesn't start and stop in the same entity\n",
5315 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 NEXT;
5318 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005319 if (ret != NULL) {
5320 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5321 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5323 else
5324 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5325 }
Owen Taylor3473f882001-02-23 17:55:21 +00005326 NEXT;
5327 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005328 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005329 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005330 cur = ret;
5331 /*
5332 * Some normalization:
5333 * (a | b* | c?)* == (a | b | c)*
5334 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005335 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005336 if ((cur->c1 != NULL) &&
5337 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5338 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5339 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5340 if ((cur->c2 != NULL) &&
5341 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5342 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5343 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5344 cur = cur->c2;
5345 }
5346 }
Owen Taylor3473f882001-02-23 17:55:21 +00005347 NEXT;
5348 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005349 if (ret != NULL) {
5350 int found = 0;
5351
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005352 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005355 else
5356 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005357 /*
5358 * Some normalization:
5359 * (a | b*)+ == (a | b)*
5360 * (a | b?)+ == (a | b)*
5361 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005363 if ((cur->c1 != NULL) &&
5364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5367 found = 1;
5368 }
5369 if ((cur->c2 != NULL) &&
5370 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5373 found = 1;
5374 }
5375 cur = cur->c2;
5376 }
5377 if (found)
5378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5379 }
Owen Taylor3473f882001-02-23 17:55:21 +00005380 NEXT;
5381 }
5382 return(ret);
5383}
5384
5385/**
5386 * xmlParseElementContentDecl:
5387 * @ctxt: an XML parser context
5388 * @name: the name of the element being defined.
5389 * @result: the Element Content pointer will be stored here if any
5390 *
5391 * parse the declaration for an Element content either Mixed or Children,
5392 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5393 *
5394 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5395 *
5396 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5397 */
5398
5399int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005400xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005401 xmlElementContentPtr *result) {
5402
5403 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 int res;
5406
5407 *result = NULL;
5408
5409 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005410 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005411 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 return(-1);
5413 }
5414 NEXT;
5415 GROW;
5416 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005417 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005418 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 res = XML_ELEMENT_TYPE_MIXED;
5420 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005422 res = XML_ELEMENT_TYPE_ELEMENT;
5423 }
Owen Taylor3473f882001-02-23 17:55:21 +00005424 SKIP_BLANKS;
5425 *result = tree;
5426 return(res);
5427}
5428
5429/**
5430 * xmlParseElementDecl:
5431 * @ctxt: an XML parser context
5432 *
5433 * parse an Element declaration.
5434 *
5435 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5436 *
5437 * [ VC: Unique Element Type Declaration ]
5438 * No element type may be declared more than once
5439 *
5440 * Returns the type of the element, or -1 in case of error
5441 */
5442int
5443xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005444 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005445 int ret = -1;
5446 xmlElementContentPtr content = NULL;
5447
Daniel Veillard4c778d82005-01-23 17:37:44 +00005448 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005449 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005450 xmlParserInputPtr input = ctxt->input;
5451
5452 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005453 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005456 }
5457 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5461 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 return(-1);
5463 }
5464 while ((RAW == 0) && (ctxt->inputNr > 1))
5465 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005466 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
5470 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005471 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005472 SKIP(5);
5473 /*
5474 * Element must always be empty.
5475 */
5476 ret = XML_ELEMENT_TYPE_EMPTY;
5477 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5478 (NXT(2) == 'Y')) {
5479 SKIP(3);
5480 /*
5481 * Element is a generic container.
5482 */
5483 ret = XML_ELEMENT_TYPE_ANY;
5484 } else if (RAW == '(') {
5485 ret = xmlParseElementContentDecl(ctxt, name, &content);
5486 } else {
5487 /*
5488 * [ WFC: PEs in Internal Subset ] error handling.
5489 */
5490 if ((RAW == '%') && (ctxt->external == 0) &&
5491 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005492 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005494 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005495 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005496 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498 return(-1);
5499 }
5500
5501 SKIP_BLANKS;
5502 /*
5503 * Pop-up of finished entities.
5504 */
5505 while ((RAW == 0) && (ctxt->inputNr > 1))
5506 xmlPopInput(ctxt);
5507 SKIP_BLANKS;
5508
5509 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005510 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005511 if (content != NULL) {
5512 xmlFreeDocElementContent(ctxt->myDoc, content);
5513 }
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else {
5515 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005516 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519
5520 NEXT;
5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005522 (ctxt->sax->elementDecl != NULL)) {
5523 if (content != NULL)
5524 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5526 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005527 if ((content != NULL) && (content->parent == NULL)) {
5528 /*
5529 * this is a trick: if xmlAddElementDecl is called,
5530 * instead of copying the full tree it is plugged directly
5531 * if called from the parser. Avoid duplicating the
5532 * interfaces or change the API/ABI
5533 */
5534 xmlFreeDocElementContent(ctxt->myDoc, content);
5535 }
5536 } else if (content != NULL) {
5537 xmlFreeDocElementContent(ctxt->myDoc, content);
5538 }
Owen Taylor3473f882001-02-23 17:55:21 +00005539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
5541 return(ret);
5542}
5543
5544/**
Owen Taylor3473f882001-02-23 17:55:21 +00005545 * xmlParseConditionalSections
5546 * @ctxt: an XML parser context
5547 *
5548 * [61] conditionalSect ::= includeSect | ignoreSect
5549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5553 */
5554
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005555static void
Owen Taylor3473f882001-02-23 17:55:21 +00005556xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5557 SKIP(3);
5558 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005559 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 SKIP(7);
5561 SKIP_BLANKS;
5562 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else {
5565 NEXT;
5566 }
5567 if (xmlParserDebugEntities) {
5568 if ((ctxt->input != NULL) && (ctxt->input->filename))
5569 xmlGenericError(xmlGenericErrorContext,
5570 "%s(%d): ", ctxt->input->filename,
5571 ctxt->input->line);
5572 xmlGenericError(xmlGenericErrorContext,
5573 "Entering INCLUDE Conditional Section\n");
5574 }
5575
5576 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5577 (NXT(2) != '>'))) {
5578 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005579 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005580
5581 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5582 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005583 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005584 NEXT;
5585 } else if (RAW == '%') {
5586 xmlParsePEReference(ctxt);
5587 } else
5588 xmlParseMarkupDecl(ctxt);
5589
5590 /*
5591 * Pop-up of finished entities.
5592 */
5593 while ((RAW == 0) && (ctxt->inputNr > 1))
5594 xmlPopInput(ctxt);
5595
Daniel Veillardfdc91562002-07-01 21:52:03 +00005596 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005597 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 break;
5599 }
5600 }
5601 if (xmlParserDebugEntities) {
5602 if ((ctxt->input != NULL) && (ctxt->input->filename))
5603 xmlGenericError(xmlGenericErrorContext,
5604 "%s(%d): ", ctxt->input->filename,
5605 ctxt->input->line);
5606 xmlGenericError(xmlGenericErrorContext,
5607 "Leaving INCLUDE Conditional Section\n");
5608 }
5609
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005612 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005613 int depth = 0;
5614
5615 SKIP(6);
5616 SKIP_BLANKS;
5617 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005618 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005619 } else {
5620 NEXT;
5621 }
5622 if (xmlParserDebugEntities) {
5623 if ((ctxt->input != NULL) && (ctxt->input->filename))
5624 xmlGenericError(xmlGenericErrorContext,
5625 "%s(%d): ", ctxt->input->filename,
5626 ctxt->input->line);
5627 xmlGenericError(xmlGenericErrorContext,
5628 "Entering IGNORE Conditional Section\n");
5629 }
5630
5631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005632 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005633 * But disable SAX event generating DTD building in the meantime
5634 */
5635 state = ctxt->disableSAX;
5636 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005638 ctxt->instate = XML_PARSER_IGNORE;
5639
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005640 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5642 depth++;
5643 SKIP(3);
5644 continue;
5645 }
5646 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5647 if (--depth >= 0) SKIP(3);
5648 continue;
5649 }
5650 NEXT;
5651 continue;
5652 }
5653
5654 ctxt->disableSAX = state;
5655 ctxt->instate = instate;
5656
5657 if (xmlParserDebugEntities) {
5658 if ((ctxt->input != NULL) && (ctxt->input->filename))
5659 xmlGenericError(xmlGenericErrorContext,
5660 "%s(%d): ", ctxt->input->filename,
5661 ctxt->input->line);
5662 xmlGenericError(xmlGenericErrorContext,
5663 "Leaving IGNORE Conditional Section\n");
5664 }
5665
5666 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669
5670 if (RAW == 0)
5671 SHRINK;
5672
5673 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005674 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005675 } else {
5676 SKIP(3);
5677 }
5678}
5679
5680/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005681 * xmlParseMarkupDecl:
5682 * @ctxt: an XML parser context
5683 *
5684 * parse Markup declarations
5685 *
5686 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5687 * NotationDecl | PI | Comment
5688 *
5689 * [ VC: Proper Declaration/PE Nesting ]
5690 * Parameter-entity replacement text must be properly nested with
5691 * markup declarations. That is to say, if either the first character
5692 * or the last character of a markup declaration (markupdecl above) is
5693 * contained in the replacement text for a parameter-entity reference,
5694 * both must be contained in the same replacement text.
5695 *
5696 * [ WFC: PEs in Internal Subset ]
5697 * In the internal DTD subset, parameter-entity references can occur
5698 * only where markup declarations can occur, not within markup declarations.
5699 * (This does not apply to references that occur in external parameter
5700 * entities or to the external subset.)
5701 */
5702void
5703xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5704 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005705 if (CUR == '<') {
5706 if (NXT(1) == '!') {
5707 switch (NXT(2)) {
5708 case 'E':
5709 if (NXT(3) == 'L')
5710 xmlParseElementDecl(ctxt);
5711 else if (NXT(3) == 'N')
5712 xmlParseEntityDecl(ctxt);
5713 break;
5714 case 'A':
5715 xmlParseAttributeListDecl(ctxt);
5716 break;
5717 case 'N':
5718 xmlParseNotationDecl(ctxt);
5719 break;
5720 case '-':
5721 xmlParseComment(ctxt);
5722 break;
5723 default:
5724 /* there is an error but it will be detected later */
5725 break;
5726 }
5727 } else if (NXT(1) == '?') {
5728 xmlParsePI(ctxt);
5729 }
5730 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005731 /*
5732 * This is only for internal subset. On external entities,
5733 * the replacement is done before parsing stage
5734 */
5735 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5736 xmlParsePEReference(ctxt);
5737
5738 /*
5739 * Conditional sections are allowed from entities included
5740 * by PE References in the internal subset.
5741 */
5742 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5744 xmlParseConditionalSections(ctxt);
5745 }
5746 }
5747
5748 ctxt->instate = XML_PARSER_DTD;
5749}
5750
5751/**
5752 * xmlParseTextDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse an XML declaration header for external entities
5756 *
5757 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5758 *
5759 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5760 */
5761
5762void
5763xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5764 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005765 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005766
5767 /*
5768 * We know that '<?xml' is here.
5769 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005771 SKIP(5);
5772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005774 return;
5775 }
5776
William M. Brack76e95df2003-10-18 16:20:14 +00005777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005780 }
5781 SKIP_BLANKS;
5782
5783 /*
5784 * We may have the VersionInfo here.
5785 */
5786 version = xmlParseVersionInfo(ctxt);
5787 if (version == NULL)
5788 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005789 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005790 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005793 }
5794 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005795 ctxt->input->version = version;
5796
5797 /*
5798 * We must have the encoding declaration
5799 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005800 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005801 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5802 /*
5803 * The XML REC instructs us to stop parsing right here
5804 */
5805 return;
5806 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005807 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5809 "Missing encoding in text declaration\n");
5810 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005811
5812 SKIP_BLANKS;
5813 if ((RAW == '?') && (NXT(1) == '>')) {
5814 SKIP(2);
5815 } else if (RAW == '>') {
5816 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005817 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005818 NEXT;
5819 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005820 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005821 MOVETO_ENDTAG(CUR_PTR);
5822 NEXT;
5823 }
5824}
5825
5826/**
Owen Taylor3473f882001-02-23 17:55:21 +00005827 * xmlParseExternalSubset:
5828 * @ctxt: an XML parser context
5829 * @ExternalID: the external identifier
5830 * @SystemID: the system identifier (or URL)
5831 *
5832 * parse Markup declarations from an external subset
5833 *
5834 * [30] extSubset ::= textDecl? extSubsetDecl
5835 *
5836 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5837 */
5838void
5839xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5840 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005841 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005842 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005843 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005844 xmlParseTextDecl(ctxt);
5845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5846 /*
5847 * The XML REC instructs us to stop parsing right here
5848 */
5849 ctxt->instate = XML_PARSER_EOF;
5850 return;
5851 }
5852 }
5853 if (ctxt->myDoc == NULL) {
5854 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5855 }
5856 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5857 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5858
5859 ctxt->instate = XML_PARSER_DTD;
5860 ctxt->external = 1;
5861 while (((RAW == '<') && (NXT(1) == '?')) ||
5862 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005863 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005864 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005865 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005866
5867 GROW;
5868 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5869 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005870 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 NEXT;
5872 } else if (RAW == '%') {
5873 xmlParsePEReference(ctxt);
5874 } else
5875 xmlParseMarkupDecl(ctxt);
5876
5877 /*
5878 * Pop-up of finished entities.
5879 */
5880 while ((RAW == 0) && (ctxt->inputNr > 1))
5881 xmlPopInput(ctxt);
5882
Daniel Veillardfdc91562002-07-01 21:52:03 +00005883 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005884 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 break;
5886 }
5887 }
5888
5889 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 }
5892
5893}
5894
5895/**
5896 * xmlParseReference:
5897 * @ctxt: an XML parser context
5898 *
5899 * parse and handle entity references in content, depending on the SAX
5900 * interface, this may end-up in a call to character() if this is a
5901 * CharRef, a predefined entity, if there is no reference() callback.
5902 * or if the parser was asked to switch to that mode.
5903 *
5904 * [67] Reference ::= EntityRef | CharRef
5905 */
5906void
5907xmlParseReference(xmlParserCtxtPtr ctxt) {
5908 xmlEntityPtr ent;
5909 xmlChar *val;
5910 if (RAW != '&') return;
5911
5912 if (NXT(1) == '#') {
5913 int i = 0;
5914 xmlChar out[10];
5915 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005916 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005917
5918 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5919 /*
5920 * So we are using non-UTF-8 buffers
5921 * Check that the char fit on 8bits, if not
5922 * generate a CharRef.
5923 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005924 if (value <= 0xFF) {
5925 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005926 out[1] = 0;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928 (!ctxt->disableSAX))
5929 ctxt->sax->characters(ctxt->userData, out, 1);
5930 } else {
5931 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005932 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005934 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5936 (!ctxt->disableSAX))
5937 ctxt->sax->reference(ctxt->userData, out);
5938 }
5939 } else {
5940 /*
5941 * Just encode the value in UTF-8
5942 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005943 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 out[i] = 0;
5945 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5946 (!ctxt->disableSAX))
5947 ctxt->sax->characters(ctxt->userData, out, i);
5948 }
5949 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005950 int was_checked;
5951
Owen Taylor3473f882001-02-23 17:55:21 +00005952 ent = xmlParseEntityRef(ctxt);
5953 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005954 if (!ctxt->wellFormed)
5955 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005956 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005957 if ((ent->name != NULL) &&
5958 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5959 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005960 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005961
5962
5963 /*
5964 * The first reference to the entity trigger a parsing phase
5965 * where the ent->children is filled with the result from
5966 * the parsing.
5967 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005968 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005969 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005970
Owen Taylor3473f882001-02-23 17:55:21 +00005971 value = ent->content;
5972
5973 /*
5974 * Check that this entity is well formed
5975 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005976 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005977 (value[1] == 0) && (value[0] == '<') &&
5978 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5979 /*
5980 * DONE: get definite answer on this !!!
5981 * Lots of entity decls are used to declare a single
5982 * char
5983 * <!ENTITY lt "<">
5984 * Which seems to be valid since
5985 * 2.4: The ampersand character (&) and the left angle
5986 * bracket (<) may appear in their literal form only
5987 * when used ... They are also legal within the literal
5988 * entity value of an internal entity declaration;i
5989 * see "4.3.2 Well-Formed Parsed Entities".
5990 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5991 * Looking at the OASIS test suite and James Clark
5992 * tests, this is broken. However the XML REC uses
5993 * it. Is the XML REC not well-formed ????
5994 * This is a hack to avoid this problem
5995 *
5996 * ANSWER: since lt gt amp .. are already defined,
5997 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005999 * is lousy but acceptable.
6000 */
6001 list = xmlNewDocText(ctxt->myDoc, value);
6002 if (list != NULL) {
6003 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6004 (ent->children == NULL)) {
6005 ent->children = list;
6006 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006007 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006008 list->parent = (xmlNodePtr) ent;
6009 } else {
6010 xmlFreeNodeList(list);
6011 }
6012 } else if (list != NULL) {
6013 xmlFreeNodeList(list);
6014 }
6015 } else {
6016 /*
6017 * 4.3.2: An internal general parsed entity is well-formed
6018 * if its replacement text matches the production labeled
6019 * content.
6020 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006021
6022 void *user_data;
6023 /*
6024 * This is a bit hackish but this seems the best
6025 * way to make sure both SAX and DOM entity support
6026 * behaves okay.
6027 */
6028 if (ctxt->userData == ctxt)
6029 user_data = NULL;
6030 else
6031 user_data = ctxt->userData;
6032
Owen Taylor3473f882001-02-23 17:55:21 +00006033 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6034 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006035 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6036 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006037 ctxt->depth--;
6038 } else if (ent->etype ==
6039 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6040 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006041 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006042 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006043 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 ctxt->depth--;
6045 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006046 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006047 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6048 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
6050 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006051 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006052 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006053 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006054 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006056 (ent->children == NULL)) {
6057 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006058 if (ctxt->replaceEntities) {
6059 /*
6060 * Prune it directly in the generated document
6061 * except for single text nodes.
6062 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006063 if (((list->type == XML_TEXT_NODE) &&
6064 (list->next == NULL)) ||
6065 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006066 list->parent = (xmlNodePtr) ent;
6067 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006068 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006069 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006070 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006071 while (list != NULL) {
6072 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006073 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006074 if (list->next == NULL)
6075 ent->last = list;
6076 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006077 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006078 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006079#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006080 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6081 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006082#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006083 }
6084 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006085 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006086 while (list != NULL) {
6087 list->parent = (xmlNodePtr) ent;
6088 if (list->next == NULL)
6089 ent->last = list;
6090 list = list->next;
6091 }
Owen Taylor3473f882001-02-23 17:55:21 +00006092 }
6093 } else {
6094 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006095 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006096 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006097 } else if ((ret != XML_ERR_OK) &&
6098 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006099 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 } else if (list != NULL) {
6101 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006102 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006105 ent->checked = 1;
6106 }
6107
6108 if (ent->children == NULL) {
6109 /*
6110 * Probably running in SAX mode and the callbacks don't
6111 * build the entity content. So unless we already went
6112 * though parsing for first checking go though the entity
6113 * content to generate callbacks associated to the entity
6114 */
6115 if (was_checked == 1) {
6116 void *user_data;
6117 /*
6118 * This is a bit hackish but this seems the best
6119 * way to make sure both SAX and DOM entity support
6120 * behaves okay.
6121 */
6122 if (ctxt->userData == ctxt)
6123 user_data = NULL;
6124 else
6125 user_data = ctxt->userData;
6126
6127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6128 ctxt->depth++;
6129 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6130 ent->content, user_data, NULL);
6131 ctxt->depth--;
6132 } else if (ent->etype ==
6133 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6134 ctxt->depth++;
6135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6136 ctxt->sax, user_data, ctxt->depth,
6137 ent->URI, ent->ExternalID, NULL);
6138 ctxt->depth--;
6139 } else {
6140 ret = XML_ERR_ENTITY_PE_INTERNAL;
6141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6142 "invalid entity type found\n", NULL);
6143 }
6144 if (ret == XML_ERR_ENTITY_LOOP) {
6145 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6146 return;
6147 }
6148 }
6149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6151 /*
6152 * Entity reference callback comes second, it's somewhat
6153 * superfluous but a compatibility to historical behaviour
6154 */
6155 ctxt->sax->reference(ctxt->userData, ent->name);
6156 }
6157 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 }
6159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006160 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006161 /*
6162 * Create a node.
6163 */
6164 ctxt->sax->reference(ctxt->userData, ent->name);
6165 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006166 }
6167 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006168 /*
6169 * There is a problem on the handling of _private for entities
6170 * (bug 155816): Should we copy the content of the field from
6171 * the entity (possibly overwriting some value set by the user
6172 * when a copy is created), should we leave it alone, or should
6173 * we try to take care of different situations? The problem
6174 * is exacerbated by the usage of this field by the xmlReader.
6175 * To fix this bug, we look at _private on the created node
6176 * and, if it's NULL, we copy in whatever was in the entity.
6177 * If it's not NULL we leave it alone. This is somewhat of a
6178 * hack - maybe we should have further tests to determine
6179 * what to do.
6180 */
Owen Taylor3473f882001-02-23 17:55:21 +00006181 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6182 /*
6183 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006184 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006185 * In the first occurrence list contains the replacement.
6186 * progressive == 2 means we are operating on the Reader
6187 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006188 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006189 if (((list == NULL) && (ent->owner == 0)) ||
6190 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006191 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006192
6193 /*
6194 * when operating on a reader, the entities definitions
6195 * are always owning the entities subtree.
6196 if (ctxt->parseMode == XML_PARSE_READER)
6197 ent->owner = 1;
6198 */
6199
Daniel Veillard62f313b2001-07-04 19:49:14 +00006200 cur = ent->children;
6201 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006203 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006204 if (nw->_private == NULL)
6205 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006206 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006207 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006208 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006209 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006210 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006211 if (cur == ent->last) {
6212 /*
6213 * needed to detect some strange empty
6214 * node cases in the reader tests
6215 */
6216 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006217 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006218 (nw->type == XML_ELEMENT_NODE) &&
6219 (nw->children == NULL))
6220 nw->extra = 1;
6221
Daniel Veillard62f313b2001-07-04 19:49:14 +00006222 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006223 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006224 cur = cur->next;
6225 }
Daniel Veillard81273902003-09-30 00:43:48 +00006226#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006227 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006228 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006229#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006230 } else if (list == NULL) {
6231 xmlNodePtr nw = NULL, cur, next, last,
6232 firstChild = NULL;
6233 /*
6234 * Copy the entity child list and make it the new
6235 * entity child list. The goal is to make sure any
6236 * ID or REF referenced will be the one from the
6237 * document content and not the entity copy.
6238 */
6239 cur = ent->children;
6240 ent->children = NULL;
6241 last = ent->last;
6242 ent->last = NULL;
6243 while (cur != NULL) {
6244 next = cur->next;
6245 cur->next = NULL;
6246 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006247 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006248 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006249 if (nw->_private == NULL)
6250 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006251 if (firstChild == NULL){
6252 firstChild = cur;
6253 }
6254 xmlAddChild((xmlNodePtr) ent, nw);
6255 xmlAddChild(ctxt->node, cur);
6256 }
6257 if (cur == last)
6258 break;
6259 cur = next;
6260 }
6261 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006262#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006265#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006266 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006267 const xmlChar *nbktext;
6268
Daniel Veillard62f313b2001-07-04 19:49:14 +00006269 /*
6270 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006271 * node with a possible previous text one which
6272 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006273 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006274 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6275 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006277 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 if ((ent->last != ent->children) &&
6279 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006280 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006281 xmlAddChildList(ctxt->node, ent->children);
6282 }
6283
Owen Taylor3473f882001-02-23 17:55:21 +00006284 /*
6285 * This is to avoid a nasty side effect, see
6286 * characters() in SAX.c
6287 */
6288 ctxt->nodemem = 0;
6289 ctxt->nodelen = 0;
6290 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006291 }
6292 }
6293 } else {
6294 val = ent->content;
6295 if (val == NULL) return;
6296 /*
6297 * inline the entity.
6298 */
6299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6300 (!ctxt->disableSAX))
6301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6302 }
6303 }
6304}
6305
6306/**
6307 * xmlParseEntityRef:
6308 * @ctxt: an XML parser context
6309 *
6310 * parse ENTITY references declarations
6311 *
6312 * [68] EntityRef ::= '&' Name ';'
6313 *
6314 * [ WFC: Entity Declared ]
6315 * In a document without any DTD, a document with only an internal DTD
6316 * subset which contains no parameter entity references, or a document
6317 * with "standalone='yes'", the Name given in the entity reference
6318 * must match that in an entity declaration, except that well-formed
6319 * documents need not declare any of the following entities: amp, lt,
6320 * gt, apos, quot. The declaration of a parameter entity must precede
6321 * any reference to it. Similarly, the declaration of a general entity
6322 * must precede any reference to it which appears in a default value in an
6323 * attribute-list declaration. Note that if entities are declared in the
6324 * external subset or in external parameter entities, a non-validating
6325 * processor is not obligated to read and process their declarations;
6326 * for such documents, the rule that an entity must be declared is a
6327 * well-formedness constraint only if standalone='yes'.
6328 *
6329 * [ WFC: Parsed Entity ]
6330 * An entity reference must not contain the name of an unparsed entity
6331 *
6332 * Returns the xmlEntityPtr if found, or NULL otherwise.
6333 */
6334xmlEntityPtr
6335xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 xmlEntityPtr ent = NULL;
6338
6339 GROW;
6340
6341 if (RAW == '&') {
6342 NEXT;
6343 name = xmlParseName(ctxt);
6344 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006347 } else {
6348 if (RAW == ';') {
6349 NEXT;
6350 /*
6351 * Ask first SAX for entity resolution, otherwise try the
6352 * predefined set.
6353 */
6354 if (ctxt->sax != NULL) {
6355 if (ctxt->sax->getEntity != NULL)
6356 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006357 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006358 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006359 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6360 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006361 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006362 }
Owen Taylor3473f882001-02-23 17:55:21 +00006363 }
6364 /*
6365 * [ WFC: Entity Declared ]
6366 * In a document without any DTD, a document with only an
6367 * internal DTD subset which contains no parameter entity
6368 * references, or a document with "standalone='yes'", the
6369 * Name given in the entity reference must match that in an
6370 * entity declaration, except that well-formed documents
6371 * need not declare any of the following entities: amp, lt,
6372 * gt, apos, quot.
6373 * The declaration of a parameter entity must precede any
6374 * reference to it.
6375 * Similarly, the declaration of a general entity must
6376 * precede any reference to it which appears in a default
6377 * value in an attribute-list declaration. Note that if
6378 * entities are declared in the external subset or in
6379 * external parameter entities, a non-validating processor
6380 * is not obligated to read and process their declarations;
6381 * for such documents, the rule that an entity must be
6382 * declared is a well-formedness constraint only if
6383 * standalone='yes'.
6384 */
6385 if (ent == NULL) {
6386 if ((ctxt->standalone == 1) ||
6387 ((ctxt->hasExternalSubset == 0) &&
6388 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006389 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006390 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006392 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006393 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006394 if ((ctxt->inSubset == 0) &&
6395 (ctxt->sax != NULL) &&
6396 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006397 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006398 }
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402
6403 /*
6404 * [ WFC: Parsed Entity ]
6405 * An entity reference must not contain the name of an
6406 * unparsed entity
6407 */
6408 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006409 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006410 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006411 }
6412
6413 /*
6414 * [ WFC: No External Entity References ]
6415 * Attribute values cannot contain direct or indirect
6416 * entity references to external entities.
6417 */
6418 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6419 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6421 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423 /*
6424 * [ WFC: No < in Attribute Values ]
6425 * The replacement text of any entity referred to directly or
6426 * indirectly in an attribute value (other than "&lt;") must
6427 * not contain a <.
6428 */
6429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6430 (ent != NULL) &&
6431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6432 (ent->content != NULL) &&
6433 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006434 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006435 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006436 }
6437
6438 /*
6439 * Internal check, no parameter entities here ...
6440 */
6441 else {
6442 switch (ent->etype) {
6443 case XML_INTERNAL_PARAMETER_ENTITY:
6444 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006445 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6446 "Attempt to reference the parameter entity '%s'\n",
6447 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006448 break;
6449 default:
6450 break;
6451 }
6452 }
6453
6454 /*
6455 * [ WFC: No Recursion ]
6456 * A parsed entity must not contain a recursive reference
6457 * to itself, either directly or indirectly.
6458 * Done somewhere else
6459 */
6460
6461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 }
6465 }
6466 return(ent);
6467}
6468
6469/**
6470 * xmlParseStringEntityRef:
6471 * @ctxt: an XML parser context
6472 * @str: a pointer to an index in the string
6473 *
6474 * parse ENTITY references declarations, but this version parses it from
6475 * a string value.
6476 *
6477 * [68] EntityRef ::= '&' Name ';'
6478 *
6479 * [ WFC: Entity Declared ]
6480 * In a document without any DTD, a document with only an internal DTD
6481 * subset which contains no parameter entity references, or a document
6482 * with "standalone='yes'", the Name given in the entity reference
6483 * must match that in an entity declaration, except that well-formed
6484 * documents need not declare any of the following entities: amp, lt,
6485 * gt, apos, quot. The declaration of a parameter entity must precede
6486 * any reference to it. Similarly, the declaration of a general entity
6487 * must precede any reference to it which appears in a default value in an
6488 * attribute-list declaration. Note that if entities are declared in the
6489 * external subset or in external parameter entities, a non-validating
6490 * processor is not obligated to read and process their declarations;
6491 * for such documents, the rule that an entity must be declared is a
6492 * well-formedness constraint only if standalone='yes'.
6493 *
6494 * [ WFC: Parsed Entity ]
6495 * An entity reference must not contain the name of an unparsed entity
6496 *
6497 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6498 * is updated to the current location in the string.
6499 */
6500xmlEntityPtr
6501xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6502 xmlChar *name;
6503 const xmlChar *ptr;
6504 xmlChar cur;
6505 xmlEntityPtr ent = NULL;
6506
6507 if ((str == NULL) || (*str == NULL))
6508 return(NULL);
6509 ptr = *str;
6510 cur = *ptr;
6511 if (cur == '&') {
6512 ptr++;
6513 cur = *ptr;
6514 name = xmlParseStringName(ctxt, &ptr);
6515 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6517 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006518 } else {
6519 if (*ptr == ';') {
6520 ptr++;
6521 /*
6522 * Ask first SAX for entity resolution, otherwise try the
6523 * predefined set.
6524 */
6525 if (ctxt->sax != NULL) {
6526 if (ctxt->sax->getEntity != NULL)
6527 ent = ctxt->sax->getEntity(ctxt->userData, name);
6528 if (ent == NULL)
6529 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006530 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006531 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006532 }
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
6534 /*
6535 * [ WFC: Entity Declared ]
6536 * In a document without any DTD, a document with only an
6537 * internal DTD subset which contains no parameter entity
6538 * references, or a document with "standalone='yes'", the
6539 * Name given in the entity reference must match that in an
6540 * entity declaration, except that well-formed documents
6541 * need not declare any of the following entities: amp, lt,
6542 * gt, apos, quot.
6543 * The declaration of a parameter entity must precede any
6544 * reference to it.
6545 * Similarly, the declaration of a general entity must
6546 * precede any reference to it which appears in a default
6547 * value in an attribute-list declaration. Note that if
6548 * entities are declared in the external subset or in
6549 * external parameter entities, a non-validating processor
6550 * is not obligated to read and process their declarations;
6551 * for such documents, the rule that an entity must be
6552 * declared is a well-formedness constraint only if
6553 * standalone='yes'.
6554 */
6555 if (ent == NULL) {
6556 if ((ctxt->standalone == 1) ||
6557 ((ctxt->hasExternalSubset == 0) &&
6558 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006560 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006563 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006564 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006566 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568
6569 /*
6570 * [ WFC: Parsed Entity ]
6571 * An entity reference must not contain the name of an
6572 * unparsed entity
6573 */
6574 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 }
6578
6579 /*
6580 * [ WFC: No External Entity References ]
6581 * Attribute values cannot contain direct or indirect
6582 * entity references to external entities.
6583 */
6584 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006586 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006587 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589 /*
6590 * [ WFC: No < in Attribute Values ]
6591 * The replacement text of any entity referred to directly or
6592 * indirectly in an attribute value (other than "&lt;") must
6593 * not contain a <.
6594 */
6595 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6596 (ent != NULL) &&
6597 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6598 (ent->content != NULL) &&
6599 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006600 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6601 "'<' in entity '%s' is not allowed in attributes values\n",
6602 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604
6605 /*
6606 * Internal check, no parameter entities here ...
6607 */
6608 else {
6609 switch (ent->etype) {
6610 case XML_INTERNAL_PARAMETER_ENTITY:
6611 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6613 "Attempt to reference the parameter entity '%s'\n",
6614 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006615 break;
6616 default:
6617 break;
6618 }
6619 }
6620
6621 /*
6622 * [ WFC: No Recursion ]
6623 * A parsed entity must not contain a recursive reference
6624 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006625 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006626 */
6627
6628 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006629 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006630 }
6631 xmlFree(name);
6632 }
6633 }
6634 *str = ptr;
6635 return(ent);
6636}
6637
6638/**
6639 * xmlParsePEReference:
6640 * @ctxt: an XML parser context
6641 *
6642 * parse PEReference declarations
6643 * The entity content is handled directly by pushing it's content as
6644 * a new input stream.
6645 *
6646 * [69] PEReference ::= '%' Name ';'
6647 *
6648 * [ WFC: No Recursion ]
6649 * A parsed entity must not contain a recursive
6650 * reference to itself, either directly or indirectly.
6651 *
6652 * [ WFC: Entity Declared ]
6653 * In a document without any DTD, a document with only an internal DTD
6654 * subset which contains no parameter entity references, or a document
6655 * with "standalone='yes'", ... ... The declaration of a parameter
6656 * entity must precede any reference to it...
6657 *
6658 * [ VC: Entity Declared ]
6659 * In a document with an external subset or external parameter entities
6660 * with "standalone='no'", ... ... The declaration of a parameter entity
6661 * must precede any reference to it...
6662 *
6663 * [ WFC: In DTD ]
6664 * Parameter-entity references may only appear in the DTD.
6665 * NOTE: misleading but this is handled.
6666 */
6667void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006668xmlParsePEReference(xmlParserCtxtPtr ctxt)
6669{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006670 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006671 xmlEntityPtr entity = NULL;
6672 xmlParserInputPtr input;
6673
6674 if (RAW == '%') {
6675 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006676 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006677 if (name == NULL) {
6678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6679 "xmlParsePEReference: no name\n");
6680 } else {
6681 if (RAW == ';') {
6682 NEXT;
6683 if ((ctxt->sax != NULL) &&
6684 (ctxt->sax->getParameterEntity != NULL))
6685 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6686 name);
6687 if (entity == NULL) {
6688 /*
6689 * [ WFC: Entity Declared ]
6690 * In a document without any DTD, a document with only an
6691 * internal DTD subset which contains no parameter entity
6692 * references, or a document with "standalone='yes'", ...
6693 * ... The declaration of a parameter entity must precede
6694 * any reference to it...
6695 */
6696 if ((ctxt->standalone == 1) ||
6697 ((ctxt->hasExternalSubset == 0) &&
6698 (ctxt->hasPErefs == 0))) {
6699 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6700 "PEReference: %%%s; not found\n",
6701 name);
6702 } else {
6703 /*
6704 * [ VC: Entity Declared ]
6705 * In a document with an external subset or external
6706 * parameter entities with "standalone='no'", ...
6707 * ... The declaration of a parameter entity must
6708 * precede any reference to it...
6709 */
6710 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6711 "PEReference: %%%s; not found\n",
6712 name, NULL);
6713 ctxt->valid = 0;
6714 }
6715 } else {
6716 /*
6717 * Internal checking in case the entity quest barfed
6718 */
6719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6721 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6722 "Internal: %%%s; is not a parameter entity\n",
6723 name, NULL);
6724 } else if (ctxt->input->free != deallocblankswrapper) {
6725 input =
6726 xmlNewBlanksWrapperInputStream(ctxt, entity);
6727 xmlPushInput(ctxt, input);
6728 } else {
6729 /*
6730 * TODO !!!
6731 * handle the extra spaces added before and after
6732 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6733 */
6734 input = xmlNewEntityInputStream(ctxt, entity);
6735 xmlPushInput(ctxt, input);
6736 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006737 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006738 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006739 xmlParseTextDecl(ctxt);
6740 if (ctxt->errNo ==
6741 XML_ERR_UNSUPPORTED_ENCODING) {
6742 /*
6743 * The XML REC instructs us to stop parsing
6744 * right here
6745 */
6746 ctxt->instate = XML_PARSER_EOF;
6747 return;
6748 }
6749 }
6750 }
6751 }
6752 ctxt->hasPErefs = 1;
6753 } else {
6754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6755 }
6756 }
Owen Taylor3473f882001-02-23 17:55:21 +00006757 }
6758}
6759
6760/**
6761 * xmlParseStringPEReference:
6762 * @ctxt: an XML parser context
6763 * @str: a pointer to an index in the string
6764 *
6765 * parse PEReference declarations
6766 *
6767 * [69] PEReference ::= '%' Name ';'
6768 *
6769 * [ WFC: No Recursion ]
6770 * A parsed entity must not contain a recursive
6771 * reference to itself, either directly or indirectly.
6772 *
6773 * [ WFC: Entity Declared ]
6774 * In a document without any DTD, a document with only an internal DTD
6775 * subset which contains no parameter entity references, or a document
6776 * with "standalone='yes'", ... ... The declaration of a parameter
6777 * entity must precede any reference to it...
6778 *
6779 * [ VC: Entity Declared ]
6780 * In a document with an external subset or external parameter entities
6781 * with "standalone='no'", ... ... The declaration of a parameter entity
6782 * must precede any reference to it...
6783 *
6784 * [ WFC: In DTD ]
6785 * Parameter-entity references may only appear in the DTD.
6786 * NOTE: misleading but this is handled.
6787 *
6788 * Returns the string of the entity content.
6789 * str is updated to the current value of the index
6790 */
6791xmlEntityPtr
6792xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6793 const xmlChar *ptr;
6794 xmlChar cur;
6795 xmlChar *name;
6796 xmlEntityPtr entity = NULL;
6797
6798 if ((str == NULL) || (*str == NULL)) return(NULL);
6799 ptr = *str;
6800 cur = *ptr;
6801 if (cur == '%') {
6802 ptr++;
6803 cur = *ptr;
6804 name = xmlParseStringName(ctxt, &ptr);
6805 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6807 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006808 } else {
6809 cur = *ptr;
6810 if (cur == ';') {
6811 ptr++;
6812 cur = *ptr;
6813 if ((ctxt->sax != NULL) &&
6814 (ctxt->sax->getParameterEntity != NULL))
6815 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6816 name);
6817 if (entity == NULL) {
6818 /*
6819 * [ WFC: Entity Declared ]
6820 * In a document without any DTD, a document with only an
6821 * internal DTD subset which contains no parameter entity
6822 * references, or a document with "standalone='yes'", ...
6823 * ... The declaration of a parameter entity must precede
6824 * any reference to it...
6825 */
6826 if ((ctxt->standalone == 1) ||
6827 ((ctxt->hasExternalSubset == 0) &&
6828 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006830 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 } else {
6832 /*
6833 * [ VC: Entity Declared ]
6834 * In a document with an external subset or external
6835 * parameter entities with "standalone='no'", ...
6836 * ... The declaration of a parameter entity must
6837 * precede any reference to it...
6838 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6840 "PEReference: %%%s; not found\n",
6841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006842 ctxt->valid = 0;
6843 }
6844 } else {
6845 /*
6846 * Internal checking in case the entity quest barfed
6847 */
6848 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6849 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006850 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6851 "%%%s; is not a parameter entity\n",
6852 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006853 }
6854 }
6855 ctxt->hasPErefs = 1;
6856 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006857 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859 xmlFree(name);
6860 }
6861 }
6862 *str = ptr;
6863 return(entity);
6864}
6865
6866/**
6867 * xmlParseDocTypeDecl:
6868 * @ctxt: an XML parser context
6869 *
6870 * parse a DOCTYPE declaration
6871 *
6872 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6873 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6874 *
6875 * [ VC: Root Element Type ]
6876 * The Name in the document type declaration must match the element
6877 * type of the root element.
6878 */
6879
6880void
6881xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006882 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 xmlChar *ExternalID = NULL;
6884 xmlChar *URI = NULL;
6885
6886 /*
6887 * We know that '<!DOCTYPE' has been detected.
6888 */
6889 SKIP(9);
6890
6891 SKIP_BLANKS;
6892
6893 /*
6894 * Parse the DOCTYPE name.
6895 */
6896 name = xmlParseName(ctxt);
6897 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6899 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006900 }
6901 ctxt->intSubName = name;
6902
6903 SKIP_BLANKS;
6904
6905 /*
6906 * Check for SystemID and ExternalID
6907 */
6908 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6909
6910 if ((URI != NULL) || (ExternalID != NULL)) {
6911 ctxt->hasExternalSubset = 1;
6912 }
6913 ctxt->extSubURI = URI;
6914 ctxt->extSubSystem = ExternalID;
6915
6916 SKIP_BLANKS;
6917
6918 /*
6919 * Create and update the internal subset.
6920 */
6921 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6922 (!ctxt->disableSAX))
6923 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6924
6925 /*
6926 * Is there any internal subset declarations ?
6927 * they are handled separately in xmlParseInternalSubset()
6928 */
6929 if (RAW == '[')
6930 return;
6931
6932 /*
6933 * We should be at the end of the DOCTYPE declaration.
6934 */
6935 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006937 }
6938 NEXT;
6939}
6940
6941/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006942 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006943 * @ctxt: an XML parser context
6944 *
6945 * parse the internal subset declaration
6946 *
6947 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6948 */
6949
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006950static void
Owen Taylor3473f882001-02-23 17:55:21 +00006951xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6952 /*
6953 * Is there any DTD definition ?
6954 */
6955 if (RAW == '[') {
6956 ctxt->instate = XML_PARSER_DTD;
6957 NEXT;
6958 /*
6959 * Parse the succession of Markup declarations and
6960 * PEReferences.
6961 * Subsequence (markupdecl | PEReference | S)*
6962 */
6963 while (RAW != ']') {
6964 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006965 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006966
6967 SKIP_BLANKS;
6968 xmlParseMarkupDecl(ctxt);
6969 xmlParsePEReference(ctxt);
6970
6971 /*
6972 * Pop-up of finished entities.
6973 */
6974 while ((RAW == 0) && (ctxt->inputNr > 1))
6975 xmlPopInput(ctxt);
6976
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006979 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006980 break;
6981 }
6982 }
6983 if (RAW == ']') {
6984 NEXT;
6985 SKIP_BLANKS;
6986 }
6987 }
6988
6989 /*
6990 * We should be at the end of the DOCTYPE declaration.
6991 */
6992 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006993 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006994 }
6995 NEXT;
6996}
6997
Daniel Veillard81273902003-09-30 00:43:48 +00006998#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006999/**
7000 * xmlParseAttribute:
7001 * @ctxt: an XML parser context
7002 * @value: a xmlChar ** used to store the value of the attribute
7003 *
7004 * parse an attribute
7005 *
7006 * [41] Attribute ::= Name Eq AttValue
7007 *
7008 * [ WFC: No External Entity References ]
7009 * Attribute values cannot contain direct or indirect entity references
7010 * to external entities.
7011 *
7012 * [ WFC: No < in Attribute Values ]
7013 * The replacement text of any entity referred to directly or indirectly in
7014 * an attribute value (other than "&lt;") must not contain a <.
7015 *
7016 * [ VC: Attribute Value Type ]
7017 * The attribute must have been declared; the value must be of the type
7018 * declared for it.
7019 *
7020 * [25] Eq ::= S? '=' S?
7021 *
7022 * With namespace:
7023 *
7024 * [NS 11] Attribute ::= QName Eq AttValue
7025 *
7026 * Also the case QName == xmlns:??? is handled independently as a namespace
7027 * definition.
7028 *
7029 * Returns the attribute name, and the value in *value.
7030 */
7031
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007032const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007033xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007034 const xmlChar *name;
7035 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007036
7037 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007038 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007039 name = xmlParseName(ctxt);
7040 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007042 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007043 return(NULL);
7044 }
7045
7046 /*
7047 * read the value
7048 */
7049 SKIP_BLANKS;
7050 if (RAW == '=') {
7051 NEXT;
7052 SKIP_BLANKS;
7053 val = xmlParseAttValue(ctxt);
7054 ctxt->instate = XML_PARSER_CONTENT;
7055 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007056 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007057 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007058 return(NULL);
7059 }
7060
7061 /*
7062 * Check that xml:lang conforms to the specification
7063 * No more registered as an error, just generate a warning now
7064 * since this was deprecated in XML second edition
7065 */
7066 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7067 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007068 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7069 "Malformed value for xml:lang : %s\n",
7070 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007071 }
7072 }
7073
7074 /*
7075 * Check that xml:space conforms to the specification
7076 */
7077 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7078 if (xmlStrEqual(val, BAD_CAST "default"))
7079 *(ctxt->space) = 0;
7080 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7081 *(ctxt->space) = 1;
7082 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007083 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007084"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007085 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007086 }
7087 }
7088
7089 *value = val;
7090 return(name);
7091}
7092
7093/**
7094 * xmlParseStartTag:
7095 * @ctxt: an XML parser context
7096 *
7097 * parse a start of tag either for rule element or
7098 * EmptyElement. In both case we don't parse the tag closing chars.
7099 *
7100 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7101 *
7102 * [ WFC: Unique Att Spec ]
7103 * No attribute name may appear more than once in the same start-tag or
7104 * empty-element tag.
7105 *
7106 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7107 *
7108 * [ WFC: Unique Att Spec ]
7109 * No attribute name may appear more than once in the same start-tag or
7110 * empty-element tag.
7111 *
7112 * With namespace:
7113 *
7114 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7115 *
7116 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7117 *
7118 * Returns the element name parsed
7119 */
7120
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007121const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007122xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007123 const xmlChar *name;
7124 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007125 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007126 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007128 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007129 int i;
7130
7131 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007132 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007133
7134 name = xmlParseName(ctxt);
7135 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007137 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 return(NULL);
7139 }
7140
7141 /*
7142 * Now parse the attributes, it ends up with the ending
7143 *
7144 * (S Attribute)* S?
7145 */
7146 SKIP_BLANKS;
7147 GROW;
7148
Daniel Veillard21a0f912001-02-25 19:54:14 +00007149 while ((RAW != '>') &&
7150 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007151 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007152 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007153 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007154
7155 attname = xmlParseAttribute(ctxt, &attvalue);
7156 if ((attname != NULL) && (attvalue != NULL)) {
7157 /*
7158 * [ WFC: Unique Att Spec ]
7159 * No attribute name may appear more than once in the same
7160 * start-tag or empty-element tag.
7161 */
7162 for (i = 0; i < nbatts;i += 2) {
7163 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007164 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 xmlFree(attvalue);
7166 goto failed;
7167 }
7168 }
Owen Taylor3473f882001-02-23 17:55:21 +00007169 /*
7170 * Add the pair to atts
7171 */
7172 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007173 maxatts = 22; /* allow for 10 attrs by default */
7174 atts = (const xmlChar **)
7175 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007176 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007177 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007178 if (attvalue != NULL)
7179 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007180 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007181 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007182 ctxt->atts = atts;
7183 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007184 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007185 const xmlChar **n;
7186
Owen Taylor3473f882001-02-23 17:55:21 +00007187 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007188 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007189 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007191 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 if (attvalue != NULL)
7193 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007196 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007197 ctxt->atts = atts;
7198 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007199 }
7200 atts[nbatts++] = attname;
7201 atts[nbatts++] = attvalue;
7202 atts[nbatts] = NULL;
7203 atts[nbatts + 1] = NULL;
7204 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007205 if (attvalue != NULL)
7206 xmlFree(attvalue);
7207 }
7208
7209failed:
7210
Daniel Veillard3772de32002-12-17 10:31:45 +00007211 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007212 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7213 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007214 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7216 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007217 }
7218 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007219 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7220 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007221 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7222 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007223 break;
7224 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007225 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007226 GROW;
7227 }
7228
7229 /*
7230 * SAX: Start of Element !
7231 */
7232 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007233 (!ctxt->disableSAX)) {
7234 if (nbatts > 0)
7235 ctxt->sax->startElement(ctxt->userData, name, atts);
7236 else
7237 ctxt->sax->startElement(ctxt->userData, name, NULL);
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239
7240 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007241 /* Free only the content strings */
7242 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007243 if (atts[i] != NULL)
7244 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007245 }
7246 return(name);
7247}
7248
7249/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007251 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 * @line: line of the start tag
7253 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007254 *
7255 * parse an end of tag
7256 *
7257 * [42] ETag ::= '</' Name S? '>'
7258 *
7259 * With namespace
7260 *
7261 * [NS 9] ETag ::= '</' QName S? '>'
7262 */
7263
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007264static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007265xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007266 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007267
7268 GROW;
7269 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007270 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007271 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007272 return;
7273 }
7274 SKIP(2);
7275
Daniel Veillard46de64e2002-05-29 08:21:33 +00007276 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007277
7278 /*
7279 * We should definitely be at the ending "S? '>'" part
7280 */
7281 GROW;
7282 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007283 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007284 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007285 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007286 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007287
7288 /*
7289 * [ WFC: Element Type Match ]
7290 * The Name in an element's end-tag must match the element type in the
7291 * start-tag.
7292 *
7293 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007294 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007295 if (name == NULL) name = BAD_CAST "unparseable";
7296 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007297 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007298 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007299 }
7300
7301 /*
7302 * SAX: End of Tag
7303 */
7304 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7305 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007306 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007307
Daniel Veillarde57ec792003-09-10 10:50:59 +00007308 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007309 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007310 return;
7311}
7312
7313/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007314 * xmlParseEndTag:
7315 * @ctxt: an XML parser context
7316 *
7317 * parse an end of tag
7318 *
7319 * [42] ETag ::= '</' Name S? '>'
7320 *
7321 * With namespace
7322 *
7323 * [NS 9] ETag ::= '</' QName S? '>'
7324 */
7325
7326void
7327xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 xmlParseEndTag1(ctxt, 0);
7329}
Daniel Veillard81273902003-09-30 00:43:48 +00007330#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331
7332/************************************************************************
7333 * *
7334 * SAX 2 specific operations *
7335 * *
7336 ************************************************************************/
7337
7338static const xmlChar *
7339xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7340 int len = 0, l;
7341 int c;
7342 int count = 0;
7343
7344 /*
7345 * Handler for more complex cases
7346 */
7347 GROW;
7348 c = CUR_CHAR(l);
7349 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007350 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007351 return(NULL);
7352 }
7353
7354 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007355 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007356 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007357 (IS_COMBINING(c)) ||
7358 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359 if (count++ > 100) {
7360 count = 0;
7361 GROW;
7362 }
7363 len += l;
7364 NEXTL(l);
7365 c = CUR_CHAR(l);
7366 }
7367 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7368}
7369
7370/*
7371 * xmlGetNamespace:
7372 * @ctxt: an XML parser context
7373 * @prefix: the prefix to lookup
7374 *
7375 * Lookup the namespace name for the @prefix (which ca be NULL)
7376 * The prefix must come from the @ctxt->dict dictionnary
7377 *
7378 * Returns the namespace name or NULL if not bound
7379 */
7380static const xmlChar *
7381xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7382 int i;
7383
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007385 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 if (ctxt->nsTab[i] == prefix) {
7387 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7388 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 return(NULL);
7392}
7393
7394/**
7395 * xmlParseNCName:
7396 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007397 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 *
7399 * parse an XML name.
7400 *
7401 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7402 * CombiningChar | Extender
7403 *
7404 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7405 *
7406 * Returns the Name parsed or NULL
7407 */
7408
7409static const xmlChar *
7410xmlParseNCName(xmlParserCtxtPtr ctxt) {
7411 const xmlChar *in;
7412 const xmlChar *ret;
7413 int count = 0;
7414
7415 /*
7416 * Accelerator for simple ASCII names
7417 */
7418 in = ctxt->input->cur;
7419 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7420 ((*in >= 0x41) && (*in <= 0x5A)) ||
7421 (*in == '_')) {
7422 in++;
7423 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7424 ((*in >= 0x41) && (*in <= 0x5A)) ||
7425 ((*in >= 0x30) && (*in <= 0x39)) ||
7426 (*in == '_') || (*in == '-') ||
7427 (*in == '.'))
7428 in++;
7429 if ((*in > 0) && (*in < 0x80)) {
7430 count = in - ctxt->input->cur;
7431 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7432 ctxt->input->cur = in;
7433 ctxt->nbChars += count;
7434 ctxt->input->col += count;
7435 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007436 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 }
7438 return(ret);
7439 }
7440 }
7441 return(xmlParseNCNameComplex(ctxt));
7442}
7443
7444/**
7445 * xmlParseQName:
7446 * @ctxt: an XML parser context
7447 * @prefix: pointer to store the prefix part
7448 *
7449 * parse an XML Namespace QName
7450 *
7451 * [6] QName ::= (Prefix ':')? LocalPart
7452 * [7] Prefix ::= NCName
7453 * [8] LocalPart ::= NCName
7454 *
7455 * Returns the Name parsed or NULL
7456 */
7457
7458static const xmlChar *
7459xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7460 const xmlChar *l, *p;
7461
7462 GROW;
7463
7464 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007465 if (l == NULL) {
7466 if (CUR == ':') {
7467 l = xmlParseName(ctxt);
7468 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7470 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007471 *prefix = NULL;
7472 return(l);
7473 }
7474 }
7475 return(NULL);
7476 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477 if (CUR == ':') {
7478 NEXT;
7479 p = l;
7480 l = xmlParseNCName(ctxt);
7481 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007482 xmlChar *tmp;
7483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007484 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7485 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007486 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7487 p = xmlDictLookup(ctxt->dict, tmp, -1);
7488 if (tmp != NULL) xmlFree(tmp);
7489 *prefix = NULL;
7490 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007491 }
7492 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007493 xmlChar *tmp;
7494
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007495 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007497 NEXT;
7498 tmp = (xmlChar *) xmlParseName(ctxt);
7499 if (tmp != NULL) {
7500 tmp = xmlBuildQName(tmp, l, NULL, 0);
7501 l = xmlDictLookup(ctxt->dict, tmp, -1);
7502 if (tmp != NULL) xmlFree(tmp);
7503 *prefix = p;
7504 return(l);
7505 }
7506 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7507 l = xmlDictLookup(ctxt->dict, tmp, -1);
7508 if (tmp != NULL) xmlFree(tmp);
7509 *prefix = p;
7510 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 }
7512 *prefix = p;
7513 } else
7514 *prefix = NULL;
7515 return(l);
7516}
7517
7518/**
7519 * xmlParseQNameAndCompare:
7520 * @ctxt: an XML parser context
7521 * @name: the localname
7522 * @prefix: the prefix, if any.
7523 *
7524 * parse an XML name and compares for match
7525 * (specialized for endtag parsing)
7526 *
7527 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7528 * and the name for mismatch
7529 */
7530
7531static const xmlChar *
7532xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7533 xmlChar const *prefix) {
7534 const xmlChar *cmp = name;
7535 const xmlChar *in;
7536 const xmlChar *ret;
7537 const xmlChar *prefix2;
7538
7539 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7540
7541 GROW;
7542 in = ctxt->input->cur;
7543
7544 cmp = prefix;
7545 while (*in != 0 && *in == *cmp) {
7546 ++in;
7547 ++cmp;
7548 }
7549 if ((*cmp == 0) && (*in == ':')) {
7550 in++;
7551 cmp = name;
7552 while (*in != 0 && *in == *cmp) {
7553 ++in;
7554 ++cmp;
7555 }
William M. Brack76e95df2003-10-18 16:20:14 +00007556 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 /* success */
7558 ctxt->input->cur = in;
7559 return((const xmlChar*) 1);
7560 }
7561 }
7562 /*
7563 * all strings coms from the dictionary, equality can be done directly
7564 */
7565 ret = xmlParseQName (ctxt, &prefix2);
7566 if ((ret == name) && (prefix == prefix2))
7567 return((const xmlChar*) 1);
7568 return ret;
7569}
7570
7571/**
7572 * xmlParseAttValueInternal:
7573 * @ctxt: an XML parser context
7574 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007575 * @alloc: whether the attribute was reallocated as a new string
7576 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577 *
7578 * parse a value for an attribute.
7579 * NOTE: if no normalization is needed, the routine will return pointers
7580 * directly from the data buffer.
7581 *
7582 * 3.3.3 Attribute-Value Normalization:
7583 * Before the value of an attribute is passed to the application or
7584 * checked for validity, the XML processor must normalize it as follows:
7585 * - a character reference is processed by appending the referenced
7586 * character to the attribute value
7587 * - an entity reference is processed by recursively processing the
7588 * replacement text of the entity
7589 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7590 * appending #x20 to the normalized value, except that only a single
7591 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7592 * parsed entity or the literal entity value of an internal parsed entity
7593 * - other characters are processed by appending them to the normalized value
7594 * If the declared value is not CDATA, then the XML processor must further
7595 * process the normalized attribute value by discarding any leading and
7596 * trailing space (#x20) characters, and by replacing sequences of space
7597 * (#x20) characters by a single space (#x20) character.
7598 * All attributes for which no declaration has been read should be treated
7599 * by a non-validating parser as if declared CDATA.
7600 *
7601 * Returns the AttValue parsed or NULL. The value has to be freed by the
7602 * caller if it was copied, this can be detected by val[*len] == 0.
7603 */
7604
7605static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007606xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7607 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007608{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007610 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 xmlChar *ret = NULL;
7612
7613 GROW;
7614 in = (xmlChar *) CUR_PTR;
7615 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007616 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007617 return (NULL);
7618 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007620
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007621 /*
7622 * try to handle in this routine the most common case where no
7623 * allocation of a new string is required and where content is
7624 * pure ASCII.
7625 */
7626 limit = *in++;
7627 end = ctxt->input->end;
7628 start = in;
7629 if (in >= end) {
7630 const xmlChar *oldbase = ctxt->input->base;
7631 GROW;
7632 if (oldbase != ctxt->input->base) {
7633 long delta = ctxt->input->base - oldbase;
7634 start = start + delta;
7635 in = in + delta;
7636 }
7637 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007639 if (normalize) {
7640 /*
7641 * Skip any leading spaces
7642 */
7643 while ((in < end) && (*in != limit) &&
7644 ((*in == 0x20) || (*in == 0x9) ||
7645 (*in == 0xA) || (*in == 0xD))) {
7646 in++;
7647 start = in;
7648 if (in >= end) {
7649 const xmlChar *oldbase = ctxt->input->base;
7650 GROW;
7651 if (oldbase != ctxt->input->base) {
7652 long delta = ctxt->input->base - oldbase;
7653 start = start + delta;
7654 in = in + delta;
7655 }
7656 end = ctxt->input->end;
7657 }
7658 }
7659 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7660 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7661 if ((*in++ == 0x20) && (*in == 0x20)) break;
7662 if (in >= end) {
7663 const xmlChar *oldbase = ctxt->input->base;
7664 GROW;
7665 if (oldbase != ctxt->input->base) {
7666 long delta = ctxt->input->base - oldbase;
7667 start = start + delta;
7668 in = in + delta;
7669 }
7670 end = ctxt->input->end;
7671 }
7672 }
7673 last = in;
7674 /*
7675 * skip the trailing blanks
7676 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007677 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 while ((in < end) && (*in != limit) &&
7679 ((*in == 0x20) || (*in == 0x9) ||
7680 (*in == 0xA) || (*in == 0xD))) {
7681 in++;
7682 if (in >= end) {
7683 const xmlChar *oldbase = ctxt->input->base;
7684 GROW;
7685 if (oldbase != ctxt->input->base) {
7686 long delta = ctxt->input->base - oldbase;
7687 start = start + delta;
7688 in = in + delta;
7689 last = last + delta;
7690 }
7691 end = ctxt->input->end;
7692 }
7693 }
7694 if (*in != limit) goto need_complex;
7695 } else {
7696 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7697 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7698 in++;
7699 if (in >= end) {
7700 const xmlChar *oldbase = ctxt->input->base;
7701 GROW;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7705 in = in + delta;
7706 }
7707 end = ctxt->input->end;
7708 }
7709 }
7710 last = in;
7711 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007713 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007715 *len = last - start;
7716 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007718 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007719 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 }
7721 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007724need_complex:
7725 if (alloc) *alloc = 1;
7726 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727}
7728
7729/**
7730 * xmlParseAttribute2:
7731 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007732 * @pref: the element prefix
7733 * @elem: the element name
7734 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007736 * @len: an int * to save the length of the attribute
7737 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007738 *
7739 * parse an attribute in the new SAX2 framework.
7740 *
7741 * Returns the attribute name, and the value in *value, .
7742 */
7743
7744static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007745xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7746 const xmlChar *pref, const xmlChar *elem,
7747 const xmlChar **prefix, xmlChar **value,
7748 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007750 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007751 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752
7753 *value = NULL;
7754 GROW;
7755 name = xmlParseQName(ctxt, prefix);
7756 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7758 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 return(NULL);
7760 }
7761
7762 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007763 * get the type if needed
7764 */
7765 if (ctxt->attsSpecial != NULL) {
7766 int type;
7767
7768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7769 pref, elem, *prefix, name);
7770 if (type != 0) normalize = 1;
7771 }
7772
7773 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 * read the value
7775 */
7776 SKIP_BLANKS;
7777 if (RAW == '=') {
7778 NEXT;
7779 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007780 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 ctxt->instate = XML_PARSER_CONTENT;
7782 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007783 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 return(NULL);
7786 }
7787
Daniel Veillardd8925572005-06-08 22:34:55 +00007788 if (*prefix == ctxt->str_xml) {
7789 /*
7790 * Check that xml:lang conforms to the specification
7791 * No more registered as an error, just generate a warning now
7792 * since this was deprecated in XML second edition
7793 */
7794 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7795 internal_val = xmlStrndup(val, *len);
7796 if (!xmlCheckLanguageID(internal_val)) {
7797 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7798 "Malformed value for xml:lang : %s\n",
7799 internal_val, NULL);
7800 }
7801 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802
Daniel Veillardd8925572005-06-08 22:34:55 +00007803 /*
7804 * Check that xml:space conforms to the specification
7805 */
7806 if (xmlStrEqual(name, BAD_CAST "space")) {
7807 internal_val = xmlStrndup(val, *len);
7808 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7809 *(ctxt->space) = 0;
7810 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7811 *(ctxt->space) = 1;
7812 else {
7813 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007815 internal_val, NULL);
7816 }
7817 }
7818 if (internal_val) {
7819 xmlFree(internal_val);
7820 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822
7823 *value = val;
7824 return(name);
7825}
7826
7827/**
7828 * xmlParseStartTag2:
7829 * @ctxt: an XML parser context
7830 *
7831 * parse a start of tag either for rule element or
7832 * EmptyElement. In both case we don't parse the tag closing chars.
7833 * This routine is called when running SAX2 parsing
7834 *
7835 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7836 *
7837 * [ WFC: Unique Att Spec ]
7838 * No attribute name may appear more than once in the same start-tag or
7839 * empty-element tag.
7840 *
7841 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7842 *
7843 * [ WFC: Unique Att Spec ]
7844 * No attribute name may appear more than once in the same start-tag or
7845 * empty-element tag.
7846 *
7847 * With namespace:
7848 *
7849 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7850 *
7851 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7852 *
7853 * Returns the element name parsed
7854 */
7855
7856static const xmlChar *
7857xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007858 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 const xmlChar *localname;
7860 const xmlChar *prefix;
7861 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007862 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 const xmlChar *nsname;
7864 xmlChar *attvalue;
7865 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007868 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007869 const xmlChar *base;
7870 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007871 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872
7873 if (RAW != '<') return(NULL);
7874 NEXT1;
7875
7876 /*
7877 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7878 * point since the attribute values may be stored as pointers to
7879 * the buffer and calling SHRINK would destroy them !
7880 * The Shrinking is only possible once the full set of attribute
7881 * callbacks have been done.
7882 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007885 base = ctxt->input->base;
7886 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007887 oldline = ctxt->input->line;
7888 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007889 nbatts = 0;
7890 nratts = 0;
7891 nbdef = 0;
7892 nbNs = 0;
7893 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007894 /* Forget any namespaces added during an earlier parse of this element. */
7895 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007896
7897 localname = xmlParseQName(ctxt, &prefix);
7898 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7900 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007901 return(NULL);
7902 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007903 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904
7905 /*
7906 * Now parse the attributes, it ends up with the ending
7907 *
7908 * (S Attribute)* S?
7909 */
7910 SKIP_BLANKS;
7911 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007912 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913
7914 while ((RAW != '>') &&
7915 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007916 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007917 const xmlChar *q = CUR_PTR;
7918 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007921 attname = xmlParseAttribute2(ctxt, prefix, localname,
7922 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007923 if (ctxt->input->base != base) {
7924 if ((attvalue != NULL) && (alloc != 0))
7925 xmlFree(attvalue);
7926 attvalue = NULL;
7927 goto base_changed;
7928 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 if ((attname != NULL) && (attvalue != NULL)) {
7930 if (len < 0) len = xmlStrlen(attvalue);
7931 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007932 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7933 xmlURIPtr uri;
7934
7935 if (*URL != 0) {
7936 uri = xmlParseURI((const char *) URL);
7937 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007938 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7939 "xmlns: %s not a valid URI\n",
7940 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007941 } else {
7942 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007943 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7944 "xmlns: URI %s is not absolute\n",
7945 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007946 }
7947 xmlFreeURI(uri);
7948 }
7949 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007951 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007953 for (j = 1;j <= nbNs;j++)
7954 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7955 break;
7956 if (j <= nbNs)
7957 xmlErrAttributeDup(ctxt, NULL, attname);
7958 else
7959 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007960 if (alloc != 0) xmlFree(attvalue);
7961 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 continue;
7963 }
7964 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007965 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7966 xmlURIPtr uri;
7967
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007968 if (attname == ctxt->str_xml) {
7969 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007970 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7971 "xml namespace prefix mapped to wrong URI\n",
7972 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007973 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007974 /*
7975 * Do not keep a namespace definition node
7976 */
7977 if (alloc != 0) xmlFree(attvalue);
7978 SKIP_BLANKS;
7979 continue;
7980 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007981 uri = xmlParseURI((const char *) URL);
7982 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007983 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7984 "xmlns:%s: '%s' is not a valid URI\n",
7985 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007986 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007987 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007988 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7989 "xmlns:%s: URI %s is not absolute\n",
7990 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007991 }
7992 xmlFreeURI(uri);
7993 }
7994
Daniel Veillard0fb18932003-09-07 09:14:37 +00007995 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007996 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007997 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 for (j = 1;j <= nbNs;j++)
7999 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8000 break;
8001 if (j <= nbNs)
8002 xmlErrAttributeDup(ctxt, aprefix, attname);
8003 else
8004 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008005 if (alloc != 0) xmlFree(attvalue);
8006 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008007 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008008 continue;
8009 }
8010
8011 /*
8012 * Add the pair to atts
8013 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008014 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8015 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 if (attvalue[len] == 0)
8017 xmlFree(attvalue);
8018 goto failed;
8019 }
8020 maxatts = ctxt->maxatts;
8021 atts = ctxt->atts;
8022 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008023 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008024 atts[nbatts++] = attname;
8025 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008026 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008027 atts[nbatts++] = attvalue;
8028 attvalue += len;
8029 atts[nbatts++] = attvalue;
8030 /*
8031 * tag if some deallocation is needed
8032 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008033 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 } else {
8035 if ((attvalue != NULL) && (attvalue[len] == 0))
8036 xmlFree(attvalue);
8037 }
8038
8039failed:
8040
8041 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008042 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008043 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8044 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008045 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8047 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008048 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 }
8050 SKIP_BLANKS;
8051 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8052 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008053 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008054 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008055 break;
8056 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008057 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008058 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008059 }
8060
Daniel Veillard0fb18932003-09-07 09:14:37 +00008061 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008062 * The attributes defaulting
8063 */
8064 if (ctxt->attsDefault != NULL) {
8065 xmlDefAttrsPtr defaults;
8066
8067 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8068 if (defaults != NULL) {
8069 for (i = 0;i < defaults->nbAttrs;i++) {
8070 attname = defaults->values[4 * i];
8071 aprefix = defaults->values[4 * i + 1];
8072
8073 /*
8074 * special work for namespaces defaulted defs
8075 */
8076 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8077 /*
8078 * check that it's not a defined namespace
8079 */
8080 for (j = 1;j <= nbNs;j++)
8081 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8082 break;
8083 if (j <= nbNs) continue;
8084
8085 nsname = xmlGetNamespace(ctxt, NULL);
8086 if (nsname != defaults->values[4 * i + 2]) {
8087 if (nsPush(ctxt, NULL,
8088 defaults->values[4 * i + 2]) > 0)
8089 nbNs++;
8090 }
8091 } else if (aprefix == ctxt->str_xmlns) {
8092 /*
8093 * check that it's not a defined namespace
8094 */
8095 for (j = 1;j <= nbNs;j++)
8096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8097 break;
8098 if (j <= nbNs) continue;
8099
8100 nsname = xmlGetNamespace(ctxt, attname);
8101 if (nsname != defaults->values[2]) {
8102 if (nsPush(ctxt, attname,
8103 defaults->values[4 * i + 2]) > 0)
8104 nbNs++;
8105 }
8106 } else {
8107 /*
8108 * check that it's not a defined attribute
8109 */
8110 for (j = 0;j < nbatts;j+=5) {
8111 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8112 break;
8113 }
8114 if (j < nbatts) continue;
8115
8116 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8117 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008118 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008119 }
8120 maxatts = ctxt->maxatts;
8121 atts = ctxt->atts;
8122 }
8123 atts[nbatts++] = attname;
8124 atts[nbatts++] = aprefix;
8125 if (aprefix == NULL)
8126 atts[nbatts++] = NULL;
8127 else
8128 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8129 atts[nbatts++] = defaults->values[4 * i + 2];
8130 atts[nbatts++] = defaults->values[4 * i + 3];
8131 nbdef++;
8132 }
8133 }
8134 }
8135 }
8136
Daniel Veillarde70c8772003-11-25 07:21:18 +00008137 /*
8138 * The attributes checkings
8139 */
8140 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008141 /*
8142 * The default namespace does not apply to attribute names.
8143 */
8144 if (atts[i + 1] != NULL) {
8145 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8146 if (nsname == NULL) {
8147 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8148 "Namespace prefix %s for %s on %s is not defined\n",
8149 atts[i + 1], atts[i], localname);
8150 }
8151 atts[i + 2] = nsname;
8152 } else
8153 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008154 /*
8155 * [ WFC: Unique Att Spec ]
8156 * No attribute name may appear more than once in the same
8157 * start-tag or empty-element tag.
8158 * As extended by the Namespace in XML REC.
8159 */
8160 for (j = 0; j < i;j += 5) {
8161 if (atts[i] == atts[j]) {
8162 if (atts[i+1] == atts[j+1]) {
8163 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8164 break;
8165 }
8166 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8167 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8168 "Namespaced Attribute %s in '%s' redefined\n",
8169 atts[i], nsname, NULL);
8170 break;
8171 }
8172 }
8173 }
8174 }
8175
Daniel Veillarde57ec792003-09-10 10:50:59 +00008176 nsname = xmlGetNamespace(ctxt, prefix);
8177 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008178 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8179 "Namespace prefix %s on %s is not defined\n",
8180 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008181 }
8182 *pref = prefix;
8183 *URI = nsname;
8184
8185 /*
8186 * SAX: Start of Element !
8187 */
8188 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8189 (!ctxt->disableSAX)) {
8190 if (nbNs > 0)
8191 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8192 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8193 nbatts / 5, nbdef, atts);
8194 else
8195 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8196 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8197 }
8198
8199 /*
8200 * Free up attribute allocated strings if needed
8201 */
8202 if (attval != 0) {
8203 for (i = 3,j = 0; j < nratts;i += 5,j++)
8204 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8205 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008206 }
8207
8208 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008209
8210base_changed:
8211 /*
8212 * the attribute strings are valid iif the base didn't changed
8213 */
8214 if (attval != 0) {
8215 for (i = 3,j = 0; j < nratts;i += 5,j++)
8216 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8217 xmlFree((xmlChar *) atts[i]);
8218 }
8219 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008220 ctxt->input->line = oldline;
8221 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008222 if (ctxt->wellFormed == 1) {
8223 goto reparse;
8224 }
8225 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008226}
8227
8228/**
8229 * xmlParseEndTag2:
8230 * @ctxt: an XML parser context
8231 * @line: line of the start tag
8232 * @nsNr: number of namespaces on the start tag
8233 *
8234 * parse an end of tag
8235 *
8236 * [42] ETag ::= '</' Name S? '>'
8237 *
8238 * With namespace
8239 *
8240 * [NS 9] ETag ::= '</' QName S? '>'
8241 */
8242
8243static void
8244xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008245 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 const xmlChar *name;
8247
8248 GROW;
8249 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008250 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008251 return;
8252 }
8253 SKIP(2);
8254
William M. Brack13dfa872004-09-18 04:52:08 +00008255 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008256 if (ctxt->input->cur[tlen] == '>') {
8257 ctxt->input->cur += tlen + 1;
8258 goto done;
8259 }
8260 ctxt->input->cur += tlen;
8261 name = (xmlChar*)1;
8262 } else {
8263 if (prefix == NULL)
8264 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8265 else
8266 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8267 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268
8269 /*
8270 * We should definitely be at the ending "S? '>'" part
8271 */
8272 GROW;
8273 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008274 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008275 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 } else
8277 NEXT1;
8278
8279 /*
8280 * [ WFC: Element Type Match ]
8281 * The Name in an element's end-tag must match the element type in the
8282 * start-tag.
8283 *
8284 */
8285 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008286 if (name == NULL) name = BAD_CAST "unparseable";
8287 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008289 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 }
8291
8292 /*
8293 * SAX: End of Tag
8294 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008295done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8297 (!ctxt->disableSAX))
8298 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8299
Daniel Veillard0fb18932003-09-07 09:14:37 +00008300 spacePop(ctxt);
8301 if (nsNr != 0)
8302 nsPop(ctxt, nsNr);
8303 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008304}
8305
8306/**
Owen Taylor3473f882001-02-23 17:55:21 +00008307 * xmlParseCDSect:
8308 * @ctxt: an XML parser context
8309 *
8310 * Parse escaped pure raw content.
8311 *
8312 * [18] CDSect ::= CDStart CData CDEnd
8313 *
8314 * [19] CDStart ::= '<![CDATA['
8315 *
8316 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8317 *
8318 * [21] CDEnd ::= ']]>'
8319 */
8320void
8321xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8322 xmlChar *buf = NULL;
8323 int len = 0;
8324 int size = XML_PARSER_BUFFER_SIZE;
8325 int r, rl;
8326 int s, sl;
8327 int cur, l;
8328 int count = 0;
8329
Daniel Veillard8f597c32003-10-06 08:19:27 +00008330 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008331 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008332 SKIP(9);
8333 } else
8334 return;
8335
8336 ctxt->instate = XML_PARSER_CDATA_SECTION;
8337 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008338 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008339 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 ctxt->instate = XML_PARSER_CONTENT;
8341 return;
8342 }
8343 NEXTL(rl);
8344 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008345 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008346 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008347 ctxt->instate = XML_PARSER_CONTENT;
8348 return;
8349 }
8350 NEXTL(sl);
8351 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008352 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008353 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008354 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008355 return;
8356 }
William M. Brack871611b2003-10-18 04:53:14 +00008357 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008358 ((r != ']') || (s != ']') || (cur != '>'))) {
8359 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008360 xmlChar *tmp;
8361
Owen Taylor3473f882001-02-23 17:55:21 +00008362 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008363 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8364 if (tmp == NULL) {
8365 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 return;
8368 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008369 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008370 }
8371 COPY_BUF(rl,buf,len,r);
8372 r = s;
8373 rl = sl;
8374 s = cur;
8375 sl = l;
8376 count++;
8377 if (count > 50) {
8378 GROW;
8379 count = 0;
8380 }
8381 NEXTL(l);
8382 cur = CUR_CHAR(l);
8383 }
8384 buf[len] = 0;
8385 ctxt->instate = XML_PARSER_CONTENT;
8386 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008387 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008388 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 xmlFree(buf);
8390 return;
8391 }
8392 NEXTL(l);
8393
8394 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008395 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008396 */
8397 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8398 if (ctxt->sax->cdataBlock != NULL)
8399 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008400 else if (ctxt->sax->characters != NULL)
8401 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008402 }
8403 xmlFree(buf);
8404}
8405
8406/**
8407 * xmlParseContent:
8408 * @ctxt: an XML parser context
8409 *
8410 * Parse a content:
8411 *
8412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8413 */
8414
8415void
8416xmlParseContent(xmlParserCtxtPtr ctxt) {
8417 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008418 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008419 ((RAW != '<') || (NXT(1) != '/')) &&
8420 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008421 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008422 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008423 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008424
8425 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008426 * First case : a Processing Instruction.
8427 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008428 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008429 xmlParsePI(ctxt);
8430 }
8431
8432 /*
8433 * Second case : a CDSection
8434 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008435 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008436 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008437 xmlParseCDSect(ctxt);
8438 }
8439
8440 /*
8441 * Third case : a comment
8442 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008443 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008444 (NXT(2) == '-') && (NXT(3) == '-')) {
8445 xmlParseComment(ctxt);
8446 ctxt->instate = XML_PARSER_CONTENT;
8447 }
8448
8449 /*
8450 * Fourth case : a sub-element.
8451 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008452 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008453 xmlParseElement(ctxt);
8454 }
8455
8456 /*
8457 * Fifth case : a reference. If if has not been resolved,
8458 * parsing returns it's Name, create the node
8459 */
8460
Daniel Veillard21a0f912001-02-25 19:54:14 +00008461 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008462 xmlParseReference(ctxt);
8463 }
8464
8465 /*
8466 * Last case, text. Note that References are handled directly.
8467 */
8468 else {
8469 xmlParseCharData(ctxt, 0);
8470 }
8471
8472 GROW;
8473 /*
8474 * Pop-up of finished entities.
8475 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008476 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008477 xmlPopInput(ctxt);
8478 SHRINK;
8479
Daniel Veillardfdc91562002-07-01 21:52:03 +00008480 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008481 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008483 ctxt->instate = XML_PARSER_EOF;
8484 break;
8485 }
8486 }
8487}
8488
8489/**
8490 * xmlParseElement:
8491 * @ctxt: an XML parser context
8492 *
8493 * parse an XML element, this is highly recursive
8494 *
8495 * [39] element ::= EmptyElemTag | STag content ETag
8496 *
8497 * [ WFC: Element Type Match ]
8498 * The Name in an element's end-tag must match the element type in the
8499 * start-tag.
8500 *
Owen Taylor3473f882001-02-23 17:55:21 +00008501 */
8502
8503void
8504xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008505 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008506 const xmlChar *prefix;
8507 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008508 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008509 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008511 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008512
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008513 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8514 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8515 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8516 xmlParserMaxDepth);
8517 ctxt->instate = XML_PARSER_EOF;
8518 return;
8519 }
8520
Owen Taylor3473f882001-02-23 17:55:21 +00008521 /* Capture start position */
8522 if (ctxt->record_info) {
8523 node_info.begin_pos = ctxt->input->consumed +
8524 (CUR_PTR - ctxt->input->base);
8525 node_info.begin_line = ctxt->input->line;
8526 }
8527
8528 if (ctxt->spaceNr == 0)
8529 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008530 else if (*ctxt->space == -2)
8531 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008532 else
8533 spacePush(ctxt, *ctxt->space);
8534
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008535 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008536#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008537 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008539 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008540#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 else
8542 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008543#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008544 if (name == NULL) {
8545 spacePop(ctxt);
8546 return;
8547 }
8548 namePush(ctxt, name);
8549 ret = ctxt->node;
8550
Daniel Veillard4432df22003-09-28 18:58:27 +00008551#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008552 /*
8553 * [ VC: Root Element Type ]
8554 * The Name in the document type declaration must match the element
8555 * type of the root element.
8556 */
8557 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8558 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8559 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008560#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008561
8562 /*
8563 * Check for an Empty Element.
8564 */
8565 if ((RAW == '/') && (NXT(1) == '>')) {
8566 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 if (ctxt->sax2) {
8568 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8569 (!ctxt->disableSAX))
8570 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008571#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008572 } else {
8573 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8574 (!ctxt->disableSAX))
8575 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008576#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008577 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008578 namePop(ctxt);
8579 spacePop(ctxt);
8580 if (nsNr != ctxt->nsNr)
8581 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008582 if ( ret != NULL && ctxt->record_info ) {
8583 node_info.end_pos = ctxt->input->consumed +
8584 (CUR_PTR - ctxt->input->base);
8585 node_info.end_line = ctxt->input->line;
8586 node_info.node = ret;
8587 xmlParserAddNodeInfo(ctxt, &node_info);
8588 }
8589 return;
8590 }
8591 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008592 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008593 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008594 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8595 "Couldn't find end of Start Tag %s line %d\n",
8596 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597
8598 /*
8599 * end of parsing of this node.
8600 */
8601 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008602 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008604 if (nsNr != ctxt->nsNr)
8605 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008606
8607 /*
8608 * Capture end position and add node
8609 */
8610 if ( ret != NULL && ctxt->record_info ) {
8611 node_info.end_pos = ctxt->input->consumed +
8612 (CUR_PTR - ctxt->input->base);
8613 node_info.end_line = ctxt->input->line;
8614 node_info.node = ret;
8615 xmlParserAddNodeInfo(ctxt, &node_info);
8616 }
8617 return;
8618 }
8619
8620 /*
8621 * Parse the content of the element:
8622 */
8623 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008624 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008625 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008626 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008627 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008628
8629 /*
8630 * end of parsing of this node.
8631 */
8632 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008633 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 if (nsNr != ctxt->nsNr)
8636 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008637 return;
8638 }
8639
8640 /*
8641 * parse the end of tag: '</' should be here.
8642 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008643 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008644 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008646 }
8647#ifdef LIBXML_SAX1_ENABLED
8648 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008650#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008651
8652 /*
8653 * Capture end position and add node
8654 */
8655 if ( ret != NULL && ctxt->record_info ) {
8656 node_info.end_pos = ctxt->input->consumed +
8657 (CUR_PTR - ctxt->input->base);
8658 node_info.end_line = ctxt->input->line;
8659 node_info.node = ret;
8660 xmlParserAddNodeInfo(ctxt, &node_info);
8661 }
8662}
8663
8664/**
8665 * xmlParseVersionNum:
8666 * @ctxt: an XML parser context
8667 *
8668 * parse the XML version value.
8669 *
8670 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8671 *
8672 * Returns the string giving the XML version number, or NULL
8673 */
8674xmlChar *
8675xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8676 xmlChar *buf = NULL;
8677 int len = 0;
8678 int size = 10;
8679 xmlChar cur;
8680
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008681 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008682 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008684 return(NULL);
8685 }
8686 cur = CUR;
8687 while (((cur >= 'a') && (cur <= 'z')) ||
8688 ((cur >= 'A') && (cur <= 'Z')) ||
8689 ((cur >= '0') && (cur <= '9')) ||
8690 (cur == '_') || (cur == '.') ||
8691 (cur == ':') || (cur == '-')) {
8692 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008693 xmlChar *tmp;
8694
Owen Taylor3473f882001-02-23 17:55:21 +00008695 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8697 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 return(NULL);
8700 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008701 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008702 }
8703 buf[len++] = cur;
8704 NEXT;
8705 cur=CUR;
8706 }
8707 buf[len] = 0;
8708 return(buf);
8709}
8710
8711/**
8712 * xmlParseVersionInfo:
8713 * @ctxt: an XML parser context
8714 *
8715 * parse the XML version.
8716 *
8717 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8718 *
8719 * [25] Eq ::= S? '=' S?
8720 *
8721 * Returns the version string, e.g. "1.0"
8722 */
8723
8724xmlChar *
8725xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8726 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008727
Daniel Veillarda07050d2003-10-19 14:46:32 +00008728 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008729 SKIP(7);
8730 SKIP_BLANKS;
8731 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008732 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008733 return(NULL);
8734 }
8735 NEXT;
8736 SKIP_BLANKS;
8737 if (RAW == '"') {
8738 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008739 version = xmlParseVersionNum(ctxt);
8740 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008741 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008742 } else
8743 NEXT;
8744 } else if (RAW == '\''){
8745 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008746 version = xmlParseVersionNum(ctxt);
8747 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008748 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 } else
8750 NEXT;
8751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 }
8754 }
8755 return(version);
8756}
8757
8758/**
8759 * xmlParseEncName:
8760 * @ctxt: an XML parser context
8761 *
8762 * parse the XML encoding name
8763 *
8764 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8765 *
8766 * Returns the encoding name value or NULL
8767 */
8768xmlChar *
8769xmlParseEncName(xmlParserCtxtPtr ctxt) {
8770 xmlChar *buf = NULL;
8771 int len = 0;
8772 int size = 10;
8773 xmlChar cur;
8774
8775 cur = CUR;
8776 if (((cur >= 'a') && (cur <= 'z')) ||
8777 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008779 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008780 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008781 return(NULL);
8782 }
8783
8784 buf[len++] = cur;
8785 NEXT;
8786 cur = CUR;
8787 while (((cur >= 'a') && (cur <= 'z')) ||
8788 ((cur >= 'A') && (cur <= 'Z')) ||
8789 ((cur >= '0') && (cur <= '9')) ||
8790 (cur == '.') || (cur == '_') ||
8791 (cur == '-')) {
8792 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008793 xmlChar *tmp;
8794
Owen Taylor3473f882001-02-23 17:55:21 +00008795 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8797 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008798 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008799 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008800 return(NULL);
8801 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008802 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008803 }
8804 buf[len++] = cur;
8805 NEXT;
8806 cur = CUR;
8807 if (cur == 0) {
8808 SHRINK;
8809 GROW;
8810 cur = CUR;
8811 }
8812 }
8813 buf[len] = 0;
8814 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008815 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008816 }
8817 return(buf);
8818}
8819
8820/**
8821 * xmlParseEncodingDecl:
8822 * @ctxt: an XML parser context
8823 *
8824 * parse the XML encoding declaration
8825 *
8826 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8827 *
8828 * this setups the conversion filters.
8829 *
8830 * Returns the encoding value or NULL
8831 */
8832
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008833const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008834xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8835 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008836
8837 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008838 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008839 SKIP(8);
8840 SKIP_BLANKS;
8841 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008842 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008843 return(NULL);
8844 }
8845 NEXT;
8846 SKIP_BLANKS;
8847 if (RAW == '"') {
8848 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008849 encoding = xmlParseEncName(ctxt);
8850 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008852 } else
8853 NEXT;
8854 } else if (RAW == '\''){
8855 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008856 encoding = xmlParseEncName(ctxt);
8857 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008859 } else
8860 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008861 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008862 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008863 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008864 /*
8865 * UTF-16 encoding stwich has already taken place at this stage,
8866 * more over the little-endian/big-endian selection is already done
8867 */
8868 if ((encoding != NULL) &&
8869 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8870 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008871 if (ctxt->encoding != NULL)
8872 xmlFree((xmlChar *) ctxt->encoding);
8873 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008874 }
8875 /*
8876 * UTF-8 encoding is handled natively
8877 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008878 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008879 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8880 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008881 if (ctxt->encoding != NULL)
8882 xmlFree((xmlChar *) ctxt->encoding);
8883 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008884 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008885 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008886 xmlCharEncodingHandlerPtr handler;
8887
8888 if (ctxt->input->encoding != NULL)
8889 xmlFree((xmlChar *) ctxt->input->encoding);
8890 ctxt->input->encoding = encoding;
8891
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008892 handler = xmlFindCharEncodingHandler((const char *) encoding);
8893 if (handler != NULL) {
8894 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008895 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008897 "Unsupported encoding %s\n", encoding);
8898 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008899 }
8900 }
8901 }
8902 return(encoding);
8903}
8904
8905/**
8906 * xmlParseSDDecl:
8907 * @ctxt: an XML parser context
8908 *
8909 * parse the XML standalone declaration
8910 *
8911 * [32] SDDecl ::= S 'standalone' Eq
8912 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8913 *
8914 * [ VC: Standalone Document Declaration ]
8915 * TODO The standalone document declaration must have the value "no"
8916 * if any external markup declarations contain declarations of:
8917 * - attributes with default values, if elements to which these
8918 * attributes apply appear in the document without specifications
8919 * of values for these attributes, or
8920 * - entities (other than amp, lt, gt, apos, quot), if references
8921 * to those entities appear in the document, or
8922 * - attributes with values subject to normalization, where the
8923 * attribute appears in the document with a value which will change
8924 * as a result of normalization, or
8925 * - element types with element content, if white space occurs directly
8926 * within any instance of those types.
8927 *
8928 * Returns 1 if standalone, 0 otherwise
8929 */
8930
8931int
8932xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8933 int standalone = -1;
8934
8935 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008936 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008937 SKIP(10);
8938 SKIP_BLANKS;
8939 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008940 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008941 return(standalone);
8942 }
8943 NEXT;
8944 SKIP_BLANKS;
8945 if (RAW == '\''){
8946 NEXT;
8947 if ((RAW == 'n') && (NXT(1) == 'o')) {
8948 standalone = 0;
8949 SKIP(2);
8950 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8951 (NXT(2) == 's')) {
8952 standalone = 1;
8953 SKIP(3);
8954 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008955 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008956 }
8957 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008958 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008959 } else
8960 NEXT;
8961 } else if (RAW == '"'){
8962 NEXT;
8963 if ((RAW == 'n') && (NXT(1) == 'o')) {
8964 standalone = 0;
8965 SKIP(2);
8966 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8967 (NXT(2) == 's')) {
8968 standalone = 1;
8969 SKIP(3);
8970 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008971 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008972 }
8973 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008974 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008975 } else
8976 NEXT;
8977 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008978 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008979 }
8980 }
8981 return(standalone);
8982}
8983
8984/**
8985 * xmlParseXMLDecl:
8986 * @ctxt: an XML parser context
8987 *
8988 * parse an XML declaration header
8989 *
8990 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8991 */
8992
8993void
8994xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8995 xmlChar *version;
8996
8997 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008998 * This value for standalone indicates that the document has an
8999 * XML declaration but it does not have a standalone attribute.
9000 * It will be overwritten later if a standalone attribute is found.
9001 */
9002 ctxt->input->standalone = -2;
9003
9004 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009005 * We know that '<?xml' is here.
9006 */
9007 SKIP(5);
9008
William M. Brack76e95df2003-10-18 16:20:14 +00009009 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9011 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009012 }
9013 SKIP_BLANKS;
9014
9015 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009016 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009017 */
9018 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009019 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009020 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009021 } else {
9022 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9023 /*
9024 * TODO: Blueberry should be detected here
9025 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009026 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9027 "Unsupported version '%s'\n",
9028 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009029 }
9030 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009031 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009032 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009033 }
Owen Taylor3473f882001-02-23 17:55:21 +00009034
9035 /*
9036 * We may have the encoding declaration
9037 */
William M. Brack76e95df2003-10-18 16:20:14 +00009038 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009039 if ((RAW == '?') && (NXT(1) == '>')) {
9040 SKIP(2);
9041 return;
9042 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009044 }
9045 xmlParseEncodingDecl(ctxt);
9046 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9047 /*
9048 * The XML REC instructs us to stop parsing right here
9049 */
9050 return;
9051 }
9052
9053 /*
9054 * We may have the standalone status.
9055 */
William M. Brack76e95df2003-10-18 16:20:14 +00009056 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009057 if ((RAW == '?') && (NXT(1) == '>')) {
9058 SKIP(2);
9059 return;
9060 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009062 }
9063 SKIP_BLANKS;
9064 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9065
9066 SKIP_BLANKS;
9067 if ((RAW == '?') && (NXT(1) == '>')) {
9068 SKIP(2);
9069 } else if (RAW == '>') {
9070 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009071 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009072 NEXT;
9073 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009074 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009075 MOVETO_ENDTAG(CUR_PTR);
9076 NEXT;
9077 }
9078}
9079
9080/**
9081 * xmlParseMisc:
9082 * @ctxt: an XML parser context
9083 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009084 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009085 *
9086 * [27] Misc ::= Comment | PI | S
9087 */
9088
9089void
9090xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009091 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009092 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009093 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009094 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009095 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009096 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009097 NEXT;
9098 } else
9099 xmlParseComment(ctxt);
9100 }
9101}
9102
9103/**
9104 * xmlParseDocument:
9105 * @ctxt: an XML parser context
9106 *
9107 * parse an XML document (and build a tree if using the standard SAX
9108 * interface).
9109 *
9110 * [1] document ::= prolog element Misc*
9111 *
9112 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9113 *
9114 * Returns 0, -1 in case of error. the parser context is augmented
9115 * as a result of the parsing.
9116 */
9117
9118int
9119xmlParseDocument(xmlParserCtxtPtr ctxt) {
9120 xmlChar start[4];
9121 xmlCharEncoding enc;
9122
9123 xmlInitParser();
9124
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009125 if ((ctxt == NULL) || (ctxt->input == NULL))
9126 return(-1);
9127
Owen Taylor3473f882001-02-23 17:55:21 +00009128 GROW;
9129
9130 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009131 * SAX: detecting the level.
9132 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009133 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134
9135 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009136 * SAX: beginning of the document processing.
9137 */
9138 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9139 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9140
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009141 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9142 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009143 /*
9144 * Get the 4 first bytes and decode the charset
9145 * if enc != XML_CHAR_ENCODING_NONE
9146 * plug some encoding conversion routines.
9147 */
9148 start[0] = RAW;
9149 start[1] = NXT(1);
9150 start[2] = NXT(2);
9151 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009152 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009153 if (enc != XML_CHAR_ENCODING_NONE) {
9154 xmlSwitchEncoding(ctxt, enc);
9155 }
Owen Taylor3473f882001-02-23 17:55:21 +00009156 }
9157
9158
9159 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009160 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009161 }
9162
9163 /*
9164 * Check for the XMLDecl in the Prolog.
9165 */
9166 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009167 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009168
9169 /*
9170 * Note that we will switch encoding on the fly.
9171 */
9172 xmlParseXMLDecl(ctxt);
9173 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9174 /*
9175 * The XML REC instructs us to stop parsing right here
9176 */
9177 return(-1);
9178 }
9179 ctxt->standalone = ctxt->input->standalone;
9180 SKIP_BLANKS;
9181 } else {
9182 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9183 }
9184 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9185 ctxt->sax->startDocument(ctxt->userData);
9186
9187 /*
9188 * The Misc part of the Prolog
9189 */
9190 GROW;
9191 xmlParseMisc(ctxt);
9192
9193 /*
9194 * Then possibly doc type declaration(s) and more Misc
9195 * (doctypedecl Misc*)?
9196 */
9197 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009198 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009199
9200 ctxt->inSubset = 1;
9201 xmlParseDocTypeDecl(ctxt);
9202 if (RAW == '[') {
9203 ctxt->instate = XML_PARSER_DTD;
9204 xmlParseInternalSubset(ctxt);
9205 }
9206
9207 /*
9208 * Create and update the external subset.
9209 */
9210 ctxt->inSubset = 2;
9211 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9212 (!ctxt->disableSAX))
9213 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9214 ctxt->extSubSystem, ctxt->extSubURI);
9215 ctxt->inSubset = 0;
9216
9217
9218 ctxt->instate = XML_PARSER_PROLOG;
9219 xmlParseMisc(ctxt);
9220 }
9221
9222 /*
9223 * Time to start parsing the tree itself
9224 */
9225 GROW;
9226 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009227 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9228 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009229 } else {
9230 ctxt->instate = XML_PARSER_CONTENT;
9231 xmlParseElement(ctxt);
9232 ctxt->instate = XML_PARSER_EPILOG;
9233
9234
9235 /*
9236 * The Misc part at the end
9237 */
9238 xmlParseMisc(ctxt);
9239
Daniel Veillard561b7f82002-03-20 21:55:57 +00009240 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009241 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009242 }
9243 ctxt->instate = XML_PARSER_EOF;
9244 }
9245
9246 /*
9247 * SAX: end of the document processing.
9248 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009249 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009250 ctxt->sax->endDocument(ctxt->userData);
9251
Daniel Veillard5997aca2002-03-18 18:36:20 +00009252 /*
9253 * Remove locally kept entity definitions if the tree was not built
9254 */
9255 if ((ctxt->myDoc != NULL) &&
9256 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9257 xmlFreeDoc(ctxt->myDoc);
9258 ctxt->myDoc = NULL;
9259 }
9260
Daniel Veillardc7612992002-02-17 22:47:37 +00009261 if (! ctxt->wellFormed) {
9262 ctxt->valid = 0;
9263 return(-1);
9264 }
Owen Taylor3473f882001-02-23 17:55:21 +00009265 return(0);
9266}
9267
9268/**
9269 * xmlParseExtParsedEnt:
9270 * @ctxt: an XML parser context
9271 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009272 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009273 * An external general parsed entity is well-formed if it matches the
9274 * production labeled extParsedEnt.
9275 *
9276 * [78] extParsedEnt ::= TextDecl? content
9277 *
9278 * Returns 0, -1 in case of error. the parser context is augmented
9279 * as a result of the parsing.
9280 */
9281
9282int
9283xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9284 xmlChar start[4];
9285 xmlCharEncoding enc;
9286
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009287 if ((ctxt == NULL) || (ctxt->input == NULL))
9288 return(-1);
9289
Owen Taylor3473f882001-02-23 17:55:21 +00009290 xmlDefaultSAXHandlerInit();
9291
Daniel Veillard309f81d2003-09-23 09:02:53 +00009292 xmlDetectSAX2(ctxt);
9293
Owen Taylor3473f882001-02-23 17:55:21 +00009294 GROW;
9295
9296 /*
9297 * SAX: beginning of the document processing.
9298 */
9299 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9300 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9301
9302 /*
9303 * Get the 4 first bytes and decode the charset
9304 * if enc != XML_CHAR_ENCODING_NONE
9305 * plug some encoding conversion routines.
9306 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009307 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9308 start[0] = RAW;
9309 start[1] = NXT(1);
9310 start[2] = NXT(2);
9311 start[3] = NXT(3);
9312 enc = xmlDetectCharEncoding(start, 4);
9313 if (enc != XML_CHAR_ENCODING_NONE) {
9314 xmlSwitchEncoding(ctxt, enc);
9315 }
Owen Taylor3473f882001-02-23 17:55:21 +00009316 }
9317
9318
9319 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009320 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009321 }
9322
9323 /*
9324 * Check for the XMLDecl in the Prolog.
9325 */
9326 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009327 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009328
9329 /*
9330 * Note that we will switch encoding on the fly.
9331 */
9332 xmlParseXMLDecl(ctxt);
9333 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9334 /*
9335 * The XML REC instructs us to stop parsing right here
9336 */
9337 return(-1);
9338 }
9339 SKIP_BLANKS;
9340 } else {
9341 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9342 }
9343 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9344 ctxt->sax->startDocument(ctxt->userData);
9345
9346 /*
9347 * Doing validity checking on chunk doesn't make sense
9348 */
9349 ctxt->instate = XML_PARSER_CONTENT;
9350 ctxt->validate = 0;
9351 ctxt->loadsubset = 0;
9352 ctxt->depth = 0;
9353
9354 xmlParseContent(ctxt);
9355
9356 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009357 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009358 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009359 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009360 }
9361
9362 /*
9363 * SAX: end of the document processing.
9364 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009365 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009366 ctxt->sax->endDocument(ctxt->userData);
9367
9368 if (! ctxt->wellFormed) return(-1);
9369 return(0);
9370}
9371
Daniel Veillard73b013f2003-09-30 12:36:01 +00009372#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009373/************************************************************************
9374 * *
9375 * Progressive parsing interfaces *
9376 * *
9377 ************************************************************************/
9378
9379/**
9380 * xmlParseLookupSequence:
9381 * @ctxt: an XML parser context
9382 * @first: the first char to lookup
9383 * @next: the next char to lookup or zero
9384 * @third: the next char to lookup or zero
9385 *
9386 * Try to find if a sequence (first, next, third) or just (first next) or
9387 * (first) is available in the input stream.
9388 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9389 * to avoid rescanning sequences of bytes, it DOES change the state of the
9390 * parser, do not use liberally.
9391 *
9392 * Returns the index to the current parsing point if the full sequence
9393 * is available, -1 otherwise.
9394 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009395static int
Owen Taylor3473f882001-02-23 17:55:21 +00009396xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9397 xmlChar next, xmlChar third) {
9398 int base, len;
9399 xmlParserInputPtr in;
9400 const xmlChar *buf;
9401
9402 in = ctxt->input;
9403 if (in == NULL) return(-1);
9404 base = in->cur - in->base;
9405 if (base < 0) return(-1);
9406 if (ctxt->checkIndex > base)
9407 base = ctxt->checkIndex;
9408 if (in->buf == NULL) {
9409 buf = in->base;
9410 len = in->length;
9411 } else {
9412 buf = in->buf->buffer->content;
9413 len = in->buf->buffer->use;
9414 }
9415 /* take into account the sequence length */
9416 if (third) len -= 2;
9417 else if (next) len --;
9418 for (;base < len;base++) {
9419 if (buf[base] == first) {
9420 if (third != 0) {
9421 if ((buf[base + 1] != next) ||
9422 (buf[base + 2] != third)) continue;
9423 } else if (next != 0) {
9424 if (buf[base + 1] != next) continue;
9425 }
9426 ctxt->checkIndex = 0;
9427#ifdef DEBUG_PUSH
9428 if (next == 0)
9429 xmlGenericError(xmlGenericErrorContext,
9430 "PP: lookup '%c' found at %d\n",
9431 first, base);
9432 else if (third == 0)
9433 xmlGenericError(xmlGenericErrorContext,
9434 "PP: lookup '%c%c' found at %d\n",
9435 first, next, base);
9436 else
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: lookup '%c%c%c' found at %d\n",
9439 first, next, third, base);
9440#endif
9441 return(base - (in->cur - in->base));
9442 }
9443 }
9444 ctxt->checkIndex = base;
9445#ifdef DEBUG_PUSH
9446 if (next == 0)
9447 xmlGenericError(xmlGenericErrorContext,
9448 "PP: lookup '%c' failed\n", first);
9449 else if (third == 0)
9450 xmlGenericError(xmlGenericErrorContext,
9451 "PP: lookup '%c%c' failed\n", first, next);
9452 else
9453 xmlGenericError(xmlGenericErrorContext,
9454 "PP: lookup '%c%c%c' failed\n", first, next, third);
9455#endif
9456 return(-1);
9457}
9458
9459/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 * xmlParseGetLasts:
9461 * @ctxt: an XML parser context
9462 * @lastlt: pointer to store the last '<' from the input
9463 * @lastgt: pointer to store the last '>' from the input
9464 *
9465 * Lookup the last < and > in the current chunk
9466 */
9467static void
9468xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9469 const xmlChar **lastgt) {
9470 const xmlChar *tmp;
9471
9472 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9473 xmlGenericError(xmlGenericErrorContext,
9474 "Internal error: xmlParseGetLasts\n");
9475 return;
9476 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009477 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009478 tmp = ctxt->input->end;
9479 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009480 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009481 if (tmp < ctxt->input->base) {
9482 *lastlt = NULL;
9483 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009485 *lastlt = tmp;
9486 tmp++;
9487 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9488 if (*tmp == '\'') {
9489 tmp++;
9490 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9491 if (tmp < ctxt->input->end) tmp++;
9492 } else if (*tmp == '"') {
9493 tmp++;
9494 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9495 if (tmp < ctxt->input->end) tmp++;
9496 } else
9497 tmp++;
9498 }
9499 if (tmp < ctxt->input->end)
9500 *lastgt = tmp;
9501 else {
9502 tmp = *lastlt;
9503 tmp--;
9504 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9505 if (tmp >= ctxt->input->base)
9506 *lastgt = tmp;
9507 else
9508 *lastgt = NULL;
9509 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009510 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009511 } else {
9512 *lastlt = NULL;
9513 *lastgt = NULL;
9514 }
9515}
9516/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009517 * xmlCheckCdataPush:
9518 * @cur: pointer to the bock of characters
9519 * @len: length of the block in bytes
9520 *
9521 * Check that the block of characters is okay as SCdata content [20]
9522 *
9523 * Returns the number of bytes to pass if okay, a negative index where an
9524 * UTF-8 error occured otherwise
9525 */
9526static int
9527xmlCheckCdataPush(const xmlChar *utf, int len) {
9528 int ix;
9529 unsigned char c;
9530 int codepoint;
9531
9532 if ((utf == NULL) || (len <= 0))
9533 return(0);
9534
9535 for (ix = 0; ix < len;) { /* string is 0-terminated */
9536 c = utf[ix];
9537 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9538 if (c >= 0x20)
9539 ix++;
9540 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9541 ix++;
9542 else
9543 return(-ix);
9544 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9545 if (ix + 2 > len) return(ix);
9546 if ((utf[ix+1] & 0xc0 ) != 0x80)
9547 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009548 codepoint = (utf[ix] & 0x1f) << 6;
9549 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009550 if (!xmlIsCharQ(codepoint))
9551 return(-ix);
9552 ix += 2;
9553 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9554 if (ix + 3 > len) return(ix);
9555 if (((utf[ix+1] & 0xc0) != 0x80) ||
9556 ((utf[ix+2] & 0xc0) != 0x80))
9557 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009558 codepoint = (utf[ix] & 0xf) << 12;
9559 codepoint |= (utf[ix+1] & 0x3f) << 6;
9560 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009561 if (!xmlIsCharQ(codepoint))
9562 return(-ix);
9563 ix += 3;
9564 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9565 if (ix + 4 > len) return(ix);
9566 if (((utf[ix+1] & 0xc0) != 0x80) ||
9567 ((utf[ix+2] & 0xc0) != 0x80) ||
9568 ((utf[ix+3] & 0xc0) != 0x80))
9569 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009570 codepoint = (utf[ix] & 0x7) << 18;
9571 codepoint |= (utf[ix+1] & 0x3f) << 12;
9572 codepoint |= (utf[ix+2] & 0x3f) << 6;
9573 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009574 if (!xmlIsCharQ(codepoint))
9575 return(-ix);
9576 ix += 4;
9577 } else /* unknown encoding */
9578 return(-ix);
9579 }
9580 return(ix);
9581}
9582
9583/**
Owen Taylor3473f882001-02-23 17:55:21 +00009584 * xmlParseTryOrFinish:
9585 * @ctxt: an XML parser context
9586 * @terminate: last chunk indicator
9587 *
9588 * Try to progress on parsing
9589 *
9590 * Returns zero if no parsing was possible
9591 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009592static int
Owen Taylor3473f882001-02-23 17:55:21 +00009593xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9594 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009595 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009596 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009598
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009599 if (ctxt->input == NULL)
9600 return(0);
9601
Owen Taylor3473f882001-02-23 17:55:21 +00009602#ifdef DEBUG_PUSH
9603 switch (ctxt->instate) {
9604 case XML_PARSER_EOF:
9605 xmlGenericError(xmlGenericErrorContext,
9606 "PP: try EOF\n"); break;
9607 case XML_PARSER_START:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: try START\n"); break;
9610 case XML_PARSER_MISC:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: try MISC\n");break;
9613 case XML_PARSER_COMMENT:
9614 xmlGenericError(xmlGenericErrorContext,
9615 "PP: try COMMENT\n");break;
9616 case XML_PARSER_PROLOG:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try PROLOG\n");break;
9619 case XML_PARSER_START_TAG:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try START_TAG\n");break;
9622 case XML_PARSER_CONTENT:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try CONTENT\n");break;
9625 case XML_PARSER_CDATA_SECTION:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try CDATA_SECTION\n");break;
9628 case XML_PARSER_END_TAG:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try END_TAG\n");break;
9631 case XML_PARSER_ENTITY_DECL:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try ENTITY_DECL\n");break;
9634 case XML_PARSER_ENTITY_VALUE:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try ENTITY_VALUE\n");break;
9637 case XML_PARSER_ATTRIBUTE_VALUE:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try ATTRIBUTE_VALUE\n");break;
9640 case XML_PARSER_DTD:
9641 xmlGenericError(xmlGenericErrorContext,
9642 "PP: try DTD\n");break;
9643 case XML_PARSER_EPILOG:
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: try EPILOG\n");break;
9646 case XML_PARSER_PI:
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: try PI\n");break;
9649 case XML_PARSER_IGNORE:
9650 xmlGenericError(xmlGenericErrorContext,
9651 "PP: try IGNORE\n");break;
9652 }
9653#endif
9654
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009655 if ((ctxt->input != NULL) &&
9656 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009657 xmlSHRINK(ctxt);
9658 ctxt->checkIndex = 0;
9659 }
9660 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009661
Daniel Veillarda880b122003-04-21 21:36:41 +00009662 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009663 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009664 return(0);
9665
9666
Owen Taylor3473f882001-02-23 17:55:21 +00009667 /*
9668 * Pop-up of finished entities.
9669 */
9670 while ((RAW == 0) && (ctxt->inputNr > 1))
9671 xmlPopInput(ctxt);
9672
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009673 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009674 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009675 avail = ctxt->input->length -
9676 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009677 else {
9678 /*
9679 * If we are operating on converted input, try to flush
9680 * remainng chars to avoid them stalling in the non-converted
9681 * buffer.
9682 */
9683 if ((ctxt->input->buf->raw != NULL) &&
9684 (ctxt->input->buf->raw->use > 0)) {
9685 int base = ctxt->input->base -
9686 ctxt->input->buf->buffer->content;
9687 int current = ctxt->input->cur - ctxt->input->base;
9688
9689 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9690 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9691 ctxt->input->cur = ctxt->input->base + current;
9692 ctxt->input->end =
9693 &ctxt->input->buf->buffer->content[
9694 ctxt->input->buf->buffer->use];
9695 }
9696 avail = ctxt->input->buf->buffer->use -
9697 (ctxt->input->cur - ctxt->input->base);
9698 }
Owen Taylor3473f882001-02-23 17:55:21 +00009699 if (avail < 1)
9700 goto done;
9701 switch (ctxt->instate) {
9702 case XML_PARSER_EOF:
9703 /*
9704 * Document parsing is done !
9705 */
9706 goto done;
9707 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009708 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9709 xmlChar start[4];
9710 xmlCharEncoding enc;
9711
9712 /*
9713 * Very first chars read from the document flow.
9714 */
9715 if (avail < 4)
9716 goto done;
9717
9718 /*
9719 * Get the 4 first bytes and decode the charset
9720 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009721 * plug some encoding conversion routines,
9722 * else xmlSwitchEncoding will set to (default)
9723 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009724 */
9725 start[0] = RAW;
9726 start[1] = NXT(1);
9727 start[2] = NXT(2);
9728 start[3] = NXT(3);
9729 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009730 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009731 break;
9732 }
Owen Taylor3473f882001-02-23 17:55:21 +00009733
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009734 if (avail < 2)
9735 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009736 cur = ctxt->input->cur[0];
9737 next = ctxt->input->cur[1];
9738 if (cur == 0) {
9739 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9740 ctxt->sax->setDocumentLocator(ctxt->userData,
9741 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009742 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009743 ctxt->instate = XML_PARSER_EOF;
9744#ifdef DEBUG_PUSH
9745 xmlGenericError(xmlGenericErrorContext,
9746 "PP: entering EOF\n");
9747#endif
9748 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9749 ctxt->sax->endDocument(ctxt->userData);
9750 goto done;
9751 }
9752 if ((cur == '<') && (next == '?')) {
9753 /* PI or XML decl */
9754 if (avail < 5) return(ret);
9755 if ((!terminate) &&
9756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9757 return(ret);
9758 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9759 ctxt->sax->setDocumentLocator(ctxt->userData,
9760 &xmlDefaultSAXLocator);
9761 if ((ctxt->input->cur[2] == 'x') &&
9762 (ctxt->input->cur[3] == 'm') &&
9763 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009764 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009765 ret += 5;
9766#ifdef DEBUG_PUSH
9767 xmlGenericError(xmlGenericErrorContext,
9768 "PP: Parsing XML Decl\n");
9769#endif
9770 xmlParseXMLDecl(ctxt);
9771 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9772 /*
9773 * The XML REC instructs us to stop parsing right
9774 * here
9775 */
9776 ctxt->instate = XML_PARSER_EOF;
9777 return(0);
9778 }
9779 ctxt->standalone = ctxt->input->standalone;
9780 if ((ctxt->encoding == NULL) &&
9781 (ctxt->input->encoding != NULL))
9782 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9783 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9784 (!ctxt->disableSAX))
9785 ctxt->sax->startDocument(ctxt->userData);
9786 ctxt->instate = XML_PARSER_MISC;
9787#ifdef DEBUG_PUSH
9788 xmlGenericError(xmlGenericErrorContext,
9789 "PP: entering MISC\n");
9790#endif
9791 } else {
9792 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9793 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9794 (!ctxt->disableSAX))
9795 ctxt->sax->startDocument(ctxt->userData);
9796 ctxt->instate = XML_PARSER_MISC;
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: entering MISC\n");
9800#endif
9801 }
9802 } else {
9803 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9804 ctxt->sax->setDocumentLocator(ctxt->userData,
9805 &xmlDefaultSAXLocator);
9806 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009807 if (ctxt->version == NULL) {
9808 xmlErrMemory(ctxt, NULL);
9809 break;
9810 }
Owen Taylor3473f882001-02-23 17:55:21 +00009811 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9812 (!ctxt->disableSAX))
9813 ctxt->sax->startDocument(ctxt->userData);
9814 ctxt->instate = XML_PARSER_MISC;
9815#ifdef DEBUG_PUSH
9816 xmlGenericError(xmlGenericErrorContext,
9817 "PP: entering MISC\n");
9818#endif
9819 }
9820 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009822 const xmlChar *name;
9823 const xmlChar *prefix;
9824 const xmlChar *URI;
9825 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009826
9827 if ((avail < 2) && (ctxt->inputNr == 1))
9828 goto done;
9829 cur = ctxt->input->cur[0];
9830 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009832 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009833 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9834 ctxt->sax->endDocument(ctxt->userData);
9835 goto done;
9836 }
9837 if (!terminate) {
9838 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009839 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009840 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009841 goto done;
9842 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9843 goto done;
9844 }
9845 }
9846 if (ctxt->spaceNr == 0)
9847 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009848 else if (*ctxt->space == -2)
9849 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 else
9851 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009852#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009853 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009854#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009855 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009856#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009857 else
9858 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009859#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009860 if (name == NULL) {
9861 spacePop(ctxt);
9862 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009863 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9864 ctxt->sax->endDocument(ctxt->userData);
9865 goto done;
9866 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009867#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009868 /*
9869 * [ VC: Root Element Type ]
9870 * The Name in the document type declaration must match
9871 * the element type of the root element.
9872 */
9873 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9874 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9875 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009876#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009877
9878 /*
9879 * Check for an Empty Element.
9880 */
9881 if ((RAW == '/') && (NXT(1) == '>')) {
9882 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009883
9884 if (ctxt->sax2) {
9885 if ((ctxt->sax != NULL) &&
9886 (ctxt->sax->endElementNs != NULL) &&
9887 (!ctxt->disableSAX))
9888 ctxt->sax->endElementNs(ctxt->userData, name,
9889 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009890 if (ctxt->nsNr - nsNr > 0)
9891 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009892#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009893 } else {
9894 if ((ctxt->sax != NULL) &&
9895 (ctxt->sax->endElement != NULL) &&
9896 (!ctxt->disableSAX))
9897 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009898#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009899 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009900 spacePop(ctxt);
9901 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009903 } else {
9904 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009905 }
9906 break;
9907 }
9908 if (RAW == '>') {
9909 NEXT;
9910 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009911 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009912 "Couldn't find end of Start Tag %s\n",
9913 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009914 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009915 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009916 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009917 if (ctxt->sax2)
9918 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009919#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009920 else
9921 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009922#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009923
Daniel Veillarda880b122003-04-21 21:36:41 +00009924 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009925 break;
9926 }
9927 case XML_PARSER_CONTENT: {
9928 const xmlChar *test;
9929 unsigned int cons;
9930 if ((avail < 2) && (ctxt->inputNr == 1))
9931 goto done;
9932 cur = ctxt->input->cur[0];
9933 next = ctxt->input->cur[1];
9934
9935 test = CUR_PTR;
9936 cons = ctxt->input->consumed;
9937 if ((cur == '<') && (next == '/')) {
9938 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 break;
9940 } else if ((cur == '<') && (next == '?')) {
9941 if ((!terminate) &&
9942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9943 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009944 xmlParsePI(ctxt);
9945 } else if ((cur == '<') && (next != '!')) {
9946 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009947 break;
9948 } else if ((cur == '<') && (next == '!') &&
9949 (ctxt->input->cur[2] == '-') &&
9950 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009951 int term;
9952
9953 if (avail < 4)
9954 goto done;
9955 ctxt->input->cur += 4;
9956 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9957 ctxt->input->cur -= 4;
9958 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009959 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009960 xmlParseComment(ctxt);
9961 ctxt->instate = XML_PARSER_CONTENT;
9962 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9963 (ctxt->input->cur[2] == '[') &&
9964 (ctxt->input->cur[3] == 'C') &&
9965 (ctxt->input->cur[4] == 'D') &&
9966 (ctxt->input->cur[5] == 'A') &&
9967 (ctxt->input->cur[6] == 'T') &&
9968 (ctxt->input->cur[7] == 'A') &&
9969 (ctxt->input->cur[8] == '[')) {
9970 SKIP(9);
9971 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009972 break;
9973 } else if ((cur == '<') && (next == '!') &&
9974 (avail < 9)) {
9975 goto done;
9976 } else if (cur == '&') {
9977 if ((!terminate) &&
9978 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9979 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009980 xmlParseReference(ctxt);
9981 } else {
9982 /* TODO Avoid the extra copy, handle directly !!! */
9983 /*
9984 * Goal of the following test is:
9985 * - minimize calls to the SAX 'character' callback
9986 * when they are mergeable
9987 * - handle an problem for isBlank when we only parse
9988 * a sequence of blank chars and the next one is
9989 * not available to check against '<' presence.
9990 * - tries to homogenize the differences in SAX
9991 * callbacks between the push and pull versions
9992 * of the parser.
9993 */
9994 if ((ctxt->inputNr == 1) &&
9995 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9996 if (!terminate) {
9997 if (ctxt->progressive) {
9998 if ((lastlt == NULL) ||
9999 (ctxt->input->cur > lastlt))
10000 goto done;
10001 } else if (xmlParseLookupSequence(ctxt,
10002 '<', 0, 0) < 0) {
10003 goto done;
10004 }
10005 }
10006 }
10007 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010008 xmlParseCharData(ctxt, 0);
10009 }
10010 /*
10011 * Pop-up of finished entities.
10012 */
10013 while ((RAW == 0) && (ctxt->inputNr > 1))
10014 xmlPopInput(ctxt);
10015 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010016 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10017 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 ctxt->instate = XML_PARSER_EOF;
10019 break;
10020 }
10021 break;
10022 }
10023 case XML_PARSER_END_TAG:
10024 if (avail < 2)
10025 goto done;
10026 if (!terminate) {
10027 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010028 /* > can be found unescaped in attribute values */
10029 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010030 goto done;
10031 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10032 goto done;
10033 }
10034 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010035 if (ctxt->sax2) {
10036 xmlParseEndTag2(ctxt,
10037 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10038 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010039 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010040 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010041 }
10042#ifdef LIBXML_SAX1_ENABLED
10043 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010044 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010045#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010046 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010047 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010048 } else {
10049 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010050 }
10051 break;
10052 case XML_PARSER_CDATA_SECTION: {
10053 /*
10054 * The Push mode need to have the SAX callback for
10055 * cdataBlock merge back contiguous callbacks.
10056 */
10057 int base;
10058
10059 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10060 if (base < 0) {
10061 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010062 int tmp;
10063
10064 tmp = xmlCheckCdataPush(ctxt->input->cur,
10065 XML_PARSER_BIG_BUFFER_SIZE);
10066 if (tmp < 0) {
10067 tmp = -tmp;
10068 ctxt->input->cur += tmp;
10069 goto encoding_error;
10070 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010071 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10072 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010073 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010074 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010075 else if (ctxt->sax->characters != NULL)
10076 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010077 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010078 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010079 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010080 ctxt->checkIndex = 0;
10081 }
10082 goto done;
10083 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010084 int tmp;
10085
10086 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10087 if ((tmp < 0) || (tmp != base)) {
10088 tmp = -tmp;
10089 ctxt->input->cur += tmp;
10090 goto encoding_error;
10091 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010092 if ((ctxt->sax != NULL) && (base > 0) &&
10093 (!ctxt->disableSAX)) {
10094 if (ctxt->sax->cdataBlock != NULL)
10095 ctxt->sax->cdataBlock(ctxt->userData,
10096 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010097 else if (ctxt->sax->characters != NULL)
10098 ctxt->sax->characters(ctxt->userData,
10099 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010100 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010101 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010102 ctxt->checkIndex = 0;
10103 ctxt->instate = XML_PARSER_CONTENT;
10104#ifdef DEBUG_PUSH
10105 xmlGenericError(xmlGenericErrorContext,
10106 "PP: entering CONTENT\n");
10107#endif
10108 }
10109 break;
10110 }
Owen Taylor3473f882001-02-23 17:55:21 +000010111 case XML_PARSER_MISC:
10112 SKIP_BLANKS;
10113 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010114 avail = ctxt->input->length -
10115 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010116 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010117 avail = ctxt->input->buf->buffer->use -
10118 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010119 if (avail < 2)
10120 goto done;
10121 cur = ctxt->input->cur[0];
10122 next = ctxt->input->cur[1];
10123 if ((cur == '<') && (next == '?')) {
10124 if ((!terminate) &&
10125 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10126 goto done;
10127#ifdef DEBUG_PUSH
10128 xmlGenericError(xmlGenericErrorContext,
10129 "PP: Parsing PI\n");
10130#endif
10131 xmlParsePI(ctxt);
10132 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010133 (ctxt->input->cur[2] == '-') &&
10134 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010135 if ((!terminate) &&
10136 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10137 goto done;
10138#ifdef DEBUG_PUSH
10139 xmlGenericError(xmlGenericErrorContext,
10140 "PP: Parsing Comment\n");
10141#endif
10142 xmlParseComment(ctxt);
10143 ctxt->instate = XML_PARSER_MISC;
10144 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010145 (ctxt->input->cur[2] == 'D') &&
10146 (ctxt->input->cur[3] == 'O') &&
10147 (ctxt->input->cur[4] == 'C') &&
10148 (ctxt->input->cur[5] == 'T') &&
10149 (ctxt->input->cur[6] == 'Y') &&
10150 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010151 (ctxt->input->cur[8] == 'E')) {
10152 if ((!terminate) &&
10153 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10154 goto done;
10155#ifdef DEBUG_PUSH
10156 xmlGenericError(xmlGenericErrorContext,
10157 "PP: Parsing internal subset\n");
10158#endif
10159 ctxt->inSubset = 1;
10160 xmlParseDocTypeDecl(ctxt);
10161 if (RAW == '[') {
10162 ctxt->instate = XML_PARSER_DTD;
10163#ifdef DEBUG_PUSH
10164 xmlGenericError(xmlGenericErrorContext,
10165 "PP: entering DTD\n");
10166#endif
10167 } else {
10168 /*
10169 * Create and update the external subset.
10170 */
10171 ctxt->inSubset = 2;
10172 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10173 (ctxt->sax->externalSubset != NULL))
10174 ctxt->sax->externalSubset(ctxt->userData,
10175 ctxt->intSubName, ctxt->extSubSystem,
10176 ctxt->extSubURI);
10177 ctxt->inSubset = 0;
10178 ctxt->instate = XML_PARSER_PROLOG;
10179#ifdef DEBUG_PUSH
10180 xmlGenericError(xmlGenericErrorContext,
10181 "PP: entering PROLOG\n");
10182#endif
10183 }
10184 } else if ((cur == '<') && (next == '!') &&
10185 (avail < 9)) {
10186 goto done;
10187 } else {
10188 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010189 ctxt->progressive = 1;
10190 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010191#ifdef DEBUG_PUSH
10192 xmlGenericError(xmlGenericErrorContext,
10193 "PP: entering START_TAG\n");
10194#endif
10195 }
10196 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010197 case XML_PARSER_PROLOG:
10198 SKIP_BLANKS;
10199 if (ctxt->input->buf == NULL)
10200 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10201 else
10202 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10203 if (avail < 2)
10204 goto done;
10205 cur = ctxt->input->cur[0];
10206 next = ctxt->input->cur[1];
10207 if ((cur == '<') && (next == '?')) {
10208 if ((!terminate) &&
10209 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10210 goto done;
10211#ifdef DEBUG_PUSH
10212 xmlGenericError(xmlGenericErrorContext,
10213 "PP: Parsing PI\n");
10214#endif
10215 xmlParsePI(ctxt);
10216 } else if ((cur == '<') && (next == '!') &&
10217 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10218 if ((!terminate) &&
10219 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10220 goto done;
10221#ifdef DEBUG_PUSH
10222 xmlGenericError(xmlGenericErrorContext,
10223 "PP: Parsing Comment\n");
10224#endif
10225 xmlParseComment(ctxt);
10226 ctxt->instate = XML_PARSER_PROLOG;
10227 } else if ((cur == '<') && (next == '!') &&
10228 (avail < 4)) {
10229 goto done;
10230 } else {
10231 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010232 if (ctxt->progressive == 0)
10233 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010234 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010235#ifdef DEBUG_PUSH
10236 xmlGenericError(xmlGenericErrorContext,
10237 "PP: entering START_TAG\n");
10238#endif
10239 }
10240 break;
10241 case XML_PARSER_EPILOG:
10242 SKIP_BLANKS;
10243 if (ctxt->input->buf == NULL)
10244 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10245 else
10246 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10247 if (avail < 2)
10248 goto done;
10249 cur = ctxt->input->cur[0];
10250 next = ctxt->input->cur[1];
10251 if ((cur == '<') && (next == '?')) {
10252 if ((!terminate) &&
10253 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10254 goto done;
10255#ifdef DEBUG_PUSH
10256 xmlGenericError(xmlGenericErrorContext,
10257 "PP: Parsing PI\n");
10258#endif
10259 xmlParsePI(ctxt);
10260 ctxt->instate = XML_PARSER_EPILOG;
10261 } else if ((cur == '<') && (next == '!') &&
10262 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10263 if ((!terminate) &&
10264 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10265 goto done;
10266#ifdef DEBUG_PUSH
10267 xmlGenericError(xmlGenericErrorContext,
10268 "PP: Parsing Comment\n");
10269#endif
10270 xmlParseComment(ctxt);
10271 ctxt->instate = XML_PARSER_EPILOG;
10272 } else if ((cur == '<') && (next == '!') &&
10273 (avail < 4)) {
10274 goto done;
10275 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010277 ctxt->instate = XML_PARSER_EOF;
10278#ifdef DEBUG_PUSH
10279 xmlGenericError(xmlGenericErrorContext,
10280 "PP: entering EOF\n");
10281#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010282 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010283 ctxt->sax->endDocument(ctxt->userData);
10284 goto done;
10285 }
10286 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010287 case XML_PARSER_DTD: {
10288 /*
10289 * Sorry but progressive parsing of the internal subset
10290 * is not expected to be supported. We first check that
10291 * the full content of the internal subset is available and
10292 * the parsing is launched only at that point.
10293 * Internal subset ends up with "']' S? '>'" in an unescaped
10294 * section and not in a ']]>' sequence which are conditional
10295 * sections (whoever argued to keep that crap in XML deserve
10296 * a place in hell !).
10297 */
10298 int base, i;
10299 xmlChar *buf;
10300 xmlChar quote = 0;
10301
10302 base = ctxt->input->cur - ctxt->input->base;
10303 if (base < 0) return(0);
10304 if (ctxt->checkIndex > base)
10305 base = ctxt->checkIndex;
10306 buf = ctxt->input->buf->buffer->content;
10307 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10308 base++) {
10309 if (quote != 0) {
10310 if (buf[base] == quote)
10311 quote = 0;
10312 continue;
10313 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010314 if ((quote == 0) && (buf[base] == '<')) {
10315 int found = 0;
10316 /* special handling of comments */
10317 if (((unsigned int) base + 4 <
10318 ctxt->input->buf->buffer->use) &&
10319 (buf[base + 1] == '!') &&
10320 (buf[base + 2] == '-') &&
10321 (buf[base + 3] == '-')) {
10322 for (;(unsigned int) base + 3 <
10323 ctxt->input->buf->buffer->use; base++) {
10324 if ((buf[base] == '-') &&
10325 (buf[base + 1] == '-') &&
10326 (buf[base + 2] == '>')) {
10327 found = 1;
10328 base += 2;
10329 break;
10330 }
10331 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010332 if (!found) {
10333#if 0
10334 fprintf(stderr, "unfinished comment\n");
10335#endif
10336 break; /* for */
10337 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010338 continue;
10339 }
10340 }
Owen Taylor3473f882001-02-23 17:55:21 +000010341 if (buf[base] == '"') {
10342 quote = '"';
10343 continue;
10344 }
10345 if (buf[base] == '\'') {
10346 quote = '\'';
10347 continue;
10348 }
10349 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010350#if 0
10351 fprintf(stderr, "%c%c%c%c: ", buf[base],
10352 buf[base + 1], buf[base + 2], buf[base + 3]);
10353#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010354 if ((unsigned int) base +1 >=
10355 ctxt->input->buf->buffer->use)
10356 break;
10357 if (buf[base + 1] == ']') {
10358 /* conditional crap, skip both ']' ! */
10359 base++;
10360 continue;
10361 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010362 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010363 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10364 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010365 if (buf[base + i] == '>') {
10366#if 0
10367 fprintf(stderr, "found\n");
10368#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010369 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010370 }
10371 if (!IS_BLANK_CH(buf[base + i])) {
10372#if 0
10373 fprintf(stderr, "not found\n");
10374#endif
10375 goto not_end_of_int_subset;
10376 }
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010378#if 0
10379 fprintf(stderr, "end of stream\n");
10380#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010381 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010382
Owen Taylor3473f882001-02-23 17:55:21 +000010383 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010384not_end_of_int_subset:
10385 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010386 }
10387 /*
10388 * We didn't found the end of the Internal subset
10389 */
Owen Taylor3473f882001-02-23 17:55:21 +000010390#ifdef DEBUG_PUSH
10391 if (next == 0)
10392 xmlGenericError(xmlGenericErrorContext,
10393 "PP: lookup of int subset end filed\n");
10394#endif
10395 goto done;
10396
10397found_end_int_subset:
10398 xmlParseInternalSubset(ctxt);
10399 ctxt->inSubset = 2;
10400 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10401 (ctxt->sax->externalSubset != NULL))
10402 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10403 ctxt->extSubSystem, ctxt->extSubURI);
10404 ctxt->inSubset = 0;
10405 ctxt->instate = XML_PARSER_PROLOG;
10406 ctxt->checkIndex = 0;
10407#ifdef DEBUG_PUSH
10408 xmlGenericError(xmlGenericErrorContext,
10409 "PP: entering PROLOG\n");
10410#endif
10411 break;
10412 }
10413 case XML_PARSER_COMMENT:
10414 xmlGenericError(xmlGenericErrorContext,
10415 "PP: internal error, state == COMMENT\n");
10416 ctxt->instate = XML_PARSER_CONTENT;
10417#ifdef DEBUG_PUSH
10418 xmlGenericError(xmlGenericErrorContext,
10419 "PP: entering CONTENT\n");
10420#endif
10421 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010422 case XML_PARSER_IGNORE:
10423 xmlGenericError(xmlGenericErrorContext,
10424 "PP: internal error, state == IGNORE");
10425 ctxt->instate = XML_PARSER_DTD;
10426#ifdef DEBUG_PUSH
10427 xmlGenericError(xmlGenericErrorContext,
10428 "PP: entering DTD\n");
10429#endif
10430 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010431 case XML_PARSER_PI:
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: internal error, state == PI\n");
10434 ctxt->instate = XML_PARSER_CONTENT;
10435#ifdef DEBUG_PUSH
10436 xmlGenericError(xmlGenericErrorContext,
10437 "PP: entering CONTENT\n");
10438#endif
10439 break;
10440 case XML_PARSER_ENTITY_DECL:
10441 xmlGenericError(xmlGenericErrorContext,
10442 "PP: internal error, state == ENTITY_DECL\n");
10443 ctxt->instate = XML_PARSER_DTD;
10444#ifdef DEBUG_PUSH
10445 xmlGenericError(xmlGenericErrorContext,
10446 "PP: entering DTD\n");
10447#endif
10448 break;
10449 case XML_PARSER_ENTITY_VALUE:
10450 xmlGenericError(xmlGenericErrorContext,
10451 "PP: internal error, state == ENTITY_VALUE\n");
10452 ctxt->instate = XML_PARSER_CONTENT;
10453#ifdef DEBUG_PUSH
10454 xmlGenericError(xmlGenericErrorContext,
10455 "PP: entering DTD\n");
10456#endif
10457 break;
10458 case XML_PARSER_ATTRIBUTE_VALUE:
10459 xmlGenericError(xmlGenericErrorContext,
10460 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10461 ctxt->instate = XML_PARSER_START_TAG;
10462#ifdef DEBUG_PUSH
10463 xmlGenericError(xmlGenericErrorContext,
10464 "PP: entering START_TAG\n");
10465#endif
10466 break;
10467 case XML_PARSER_SYSTEM_LITERAL:
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: internal error, state == SYSTEM_LITERAL\n");
10470 ctxt->instate = XML_PARSER_START_TAG;
10471#ifdef DEBUG_PUSH
10472 xmlGenericError(xmlGenericErrorContext,
10473 "PP: entering START_TAG\n");
10474#endif
10475 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010476 case XML_PARSER_PUBLIC_LITERAL:
10477 xmlGenericError(xmlGenericErrorContext,
10478 "PP: internal error, state == PUBLIC_LITERAL\n");
10479 ctxt->instate = XML_PARSER_START_TAG;
10480#ifdef DEBUG_PUSH
10481 xmlGenericError(xmlGenericErrorContext,
10482 "PP: entering START_TAG\n");
10483#endif
10484 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010485 }
10486 }
10487done:
10488#ifdef DEBUG_PUSH
10489 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10490#endif
10491 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010492encoding_error:
10493 {
10494 char buffer[150];
10495
10496 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10497 ctxt->input->cur[0], ctxt->input->cur[1],
10498 ctxt->input->cur[2], ctxt->input->cur[3]);
10499 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10500 "Input is not proper UTF-8, indicate encoding !\n%s",
10501 BAD_CAST buffer, NULL);
10502 }
10503 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010504}
10505
10506/**
Owen Taylor3473f882001-02-23 17:55:21 +000010507 * xmlParseChunk:
10508 * @ctxt: an XML parser context
10509 * @chunk: an char array
10510 * @size: the size in byte of the chunk
10511 * @terminate: last chunk indicator
10512 *
10513 * Parse a Chunk of memory
10514 *
10515 * Returns zero if no error, the xmlParserErrors otherwise.
10516 */
10517int
10518xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10519 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010520 int end_in_lf = 0;
10521
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010522 if (ctxt == NULL)
10523 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010525 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010526 if (ctxt->instate == XML_PARSER_START)
10527 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010528 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10529 (chunk[size - 1] == '\r')) {
10530 end_in_lf = 1;
10531 size--;
10532 }
Owen Taylor3473f882001-02-23 17:55:21 +000010533 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10534 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10535 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10536 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010537 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010538
William M. Bracka3215c72004-07-31 16:24:01 +000010539 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10540 if (res < 0) {
10541 ctxt->errNo = XML_PARSER_EOF;
10542 ctxt->disableSAX = 1;
10543 return (XML_PARSER_EOF);
10544 }
Owen Taylor3473f882001-02-23 17:55:21 +000010545 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10546 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010547 ctxt->input->end =
10548 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010549#ifdef DEBUG_PUSH
10550 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10551#endif
10552
Owen Taylor3473f882001-02-23 17:55:21 +000010553 } else if (ctxt->instate != XML_PARSER_EOF) {
10554 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10555 xmlParserInputBufferPtr in = ctxt->input->buf;
10556 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10557 (in->raw != NULL)) {
10558 int nbchars;
10559
10560 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10561 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010562 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010563 xmlGenericError(xmlGenericErrorContext,
10564 "xmlParseChunk: encoder error\n");
10565 return(XML_ERR_INVALID_ENCODING);
10566 }
10567 }
10568 }
10569 }
10570 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010571 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10572 (ctxt->input->buf != NULL)) {
10573 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10574 }
Daniel Veillard14412512005-01-21 23:53:26 +000010575 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010576 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010577 if (terminate) {
10578 /*
10579 * Check for termination
10580 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010581 int avail = 0;
10582
10583 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010584 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010585 avail = ctxt->input->length -
10586 (ctxt->input->cur - ctxt->input->base);
10587 else
10588 avail = ctxt->input->buf->buffer->use -
10589 (ctxt->input->cur - ctxt->input->base);
10590 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010591
Owen Taylor3473f882001-02-23 17:55:21 +000010592 if ((ctxt->instate != XML_PARSER_EOF) &&
10593 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010594 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010595 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010596 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010598 }
Owen Taylor3473f882001-02-23 17:55:21 +000010599 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010601 ctxt->sax->endDocument(ctxt->userData);
10602 }
10603 ctxt->instate = XML_PARSER_EOF;
10604 }
10605 return((xmlParserErrors) ctxt->errNo);
10606}
10607
10608/************************************************************************
10609 * *
10610 * I/O front end functions to the parser *
10611 * *
10612 ************************************************************************/
10613
10614/**
Owen Taylor3473f882001-02-23 17:55:21 +000010615 * xmlCreatePushParserCtxt:
10616 * @sax: a SAX handler
10617 * @user_data: The user data returned on SAX callbacks
10618 * @chunk: a pointer to an array of chars
10619 * @size: number of chars in the array
10620 * @filename: an optional file name or URI
10621 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010622 * Create a parser context for using the XML parser in push mode.
10623 * If @buffer and @size are non-NULL, the data is used to detect
10624 * the encoding. The remaining characters will be parsed so they
10625 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010626 * To allow content encoding detection, @size should be >= 4
10627 * The value of @filename is used for fetching external entities
10628 * and error/warning reports.
10629 *
10630 * Returns the new parser context or NULL
10631 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010632
Owen Taylor3473f882001-02-23 17:55:21 +000010633xmlParserCtxtPtr
10634xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10635 const char *chunk, int size, const char *filename) {
10636 xmlParserCtxtPtr ctxt;
10637 xmlParserInputPtr inputStream;
10638 xmlParserInputBufferPtr buf;
10639 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10640
10641 /*
10642 * plug some encoding conversion routines
10643 */
10644 if ((chunk != NULL) && (size >= 4))
10645 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10646
10647 buf = xmlAllocParserInputBuffer(enc);
10648 if (buf == NULL) return(NULL);
10649
10650 ctxt = xmlNewParserCtxt();
10651 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010652 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010653 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010654 return(NULL);
10655 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010656 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010657 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10658 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010659 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010660 xmlFreeParserInputBuffer(buf);
10661 xmlFreeParserCtxt(ctxt);
10662 return(NULL);
10663 }
Owen Taylor3473f882001-02-23 17:55:21 +000010664 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010665#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010666 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010667#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010668 xmlFree(ctxt->sax);
10669 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10670 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010671 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010672 xmlFreeParserInputBuffer(buf);
10673 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010674 return(NULL);
10675 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010676 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10677 if (sax->initialized == XML_SAX2_MAGIC)
10678 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10679 else
10680 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010681 if (user_data != NULL)
10682 ctxt->userData = user_data;
10683 }
10684 if (filename == NULL) {
10685 ctxt->directory = NULL;
10686 } else {
10687 ctxt->directory = xmlParserGetDirectory(filename);
10688 }
10689
10690 inputStream = xmlNewInputStream(ctxt);
10691 if (inputStream == NULL) {
10692 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010693 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010694 return(NULL);
10695 }
10696
10697 if (filename == NULL)
10698 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010699 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010700 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010701 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010702 if (inputStream->filename == NULL) {
10703 xmlFreeParserCtxt(ctxt);
10704 xmlFreeParserInputBuffer(buf);
10705 return(NULL);
10706 }
10707 }
Owen Taylor3473f882001-02-23 17:55:21 +000010708 inputStream->buf = buf;
10709 inputStream->base = inputStream->buf->buffer->content;
10710 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010711 inputStream->end =
10712 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010713
10714 inputPush(ctxt, inputStream);
10715
William M. Brack3a1cd212005-02-11 14:35:54 +000010716 /*
10717 * If the caller didn't provide an initial 'chunk' for determining
10718 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10719 * that it can be automatically determined later
10720 */
10721 if ((size == 0) || (chunk == NULL)) {
10722 ctxt->charset = XML_CHAR_ENCODING_NONE;
10723 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010724 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10725 int cur = ctxt->input->cur - ctxt->input->base;
10726
Owen Taylor3473f882001-02-23 17:55:21 +000010727 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010728
10729 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10730 ctxt->input->cur = ctxt->input->base + cur;
10731 ctxt->input->end =
10732 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010733#ifdef DEBUG_PUSH
10734 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10735#endif
10736 }
10737
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010738 if (enc != XML_CHAR_ENCODING_NONE) {
10739 xmlSwitchEncoding(ctxt, enc);
10740 }
10741
Owen Taylor3473f882001-02-23 17:55:21 +000010742 return(ctxt);
10743}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010744#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010745
10746/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010747 * xmlStopParser:
10748 * @ctxt: an XML parser context
10749 *
10750 * Blocks further parser processing
10751 */
10752void
10753xmlStopParser(xmlParserCtxtPtr ctxt) {
10754 if (ctxt == NULL)
10755 return;
10756 ctxt->instate = XML_PARSER_EOF;
10757 ctxt->disableSAX = 1;
10758 if (ctxt->input != NULL) {
10759 ctxt->input->cur = BAD_CAST"";
10760 ctxt->input->base = ctxt->input->cur;
10761 }
10762}
10763
10764/**
Owen Taylor3473f882001-02-23 17:55:21 +000010765 * xmlCreateIOParserCtxt:
10766 * @sax: a SAX handler
10767 * @user_data: The user data returned on SAX callbacks
10768 * @ioread: an I/O read function
10769 * @ioclose: an I/O close function
10770 * @ioctx: an I/O handler
10771 * @enc: the charset encoding if known
10772 *
10773 * Create a parser context for using the XML parser with an existing
10774 * I/O stream
10775 *
10776 * Returns the new parser context or NULL
10777 */
10778xmlParserCtxtPtr
10779xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10780 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10781 void *ioctx, xmlCharEncoding enc) {
10782 xmlParserCtxtPtr ctxt;
10783 xmlParserInputPtr inputStream;
10784 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010785
10786 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010787
10788 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10789 if (buf == NULL) return(NULL);
10790
10791 ctxt = xmlNewParserCtxt();
10792 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010793 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010794 return(NULL);
10795 }
10796 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010797#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010798 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010799#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010800 xmlFree(ctxt->sax);
10801 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10802 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010803 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010804 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010805 return(NULL);
10806 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010807 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10808 if (sax->initialized == XML_SAX2_MAGIC)
10809 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10810 else
10811 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010812 if (user_data != NULL)
10813 ctxt->userData = user_data;
10814 }
10815
10816 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10817 if (inputStream == NULL) {
10818 xmlFreeParserCtxt(ctxt);
10819 return(NULL);
10820 }
10821 inputPush(ctxt, inputStream);
10822
10823 return(ctxt);
10824}
10825
Daniel Veillard4432df22003-09-28 18:58:27 +000010826#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010827/************************************************************************
10828 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010829 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010830 * *
10831 ************************************************************************/
10832
10833/**
10834 * xmlIOParseDTD:
10835 * @sax: the SAX handler block or NULL
10836 * @input: an Input Buffer
10837 * @enc: the charset encoding if known
10838 *
10839 * Load and parse a DTD
10840 *
10841 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010842 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010843 */
10844
10845xmlDtdPtr
10846xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10847 xmlCharEncoding enc) {
10848 xmlDtdPtr ret = NULL;
10849 xmlParserCtxtPtr ctxt;
10850 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010851 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010852
10853 if (input == NULL)
10854 return(NULL);
10855
10856 ctxt = xmlNewParserCtxt();
10857 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010858 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010859 return(NULL);
10860 }
10861
10862 /*
10863 * Set-up the SAX context
10864 */
10865 if (sax != NULL) {
10866 if (ctxt->sax != NULL)
10867 xmlFree(ctxt->sax);
10868 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010869 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010870 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010871 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010872
10873 /*
10874 * generate a parser input from the I/O handler
10875 */
10876
Daniel Veillard43caefb2003-12-07 19:32:22 +000010877 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010878 if (pinput == NULL) {
10879 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010880 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010881 xmlFreeParserCtxt(ctxt);
10882 return(NULL);
10883 }
10884
10885 /*
10886 * plug some encoding conversion routines here.
10887 */
10888 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010889 if (enc != XML_CHAR_ENCODING_NONE) {
10890 xmlSwitchEncoding(ctxt, enc);
10891 }
Owen Taylor3473f882001-02-23 17:55:21 +000010892
10893 pinput->filename = NULL;
10894 pinput->line = 1;
10895 pinput->col = 1;
10896 pinput->base = ctxt->input->cur;
10897 pinput->cur = ctxt->input->cur;
10898 pinput->free = NULL;
10899
10900 /*
10901 * let's parse that entity knowing it's an external subset.
10902 */
10903 ctxt->inSubset = 2;
10904 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10905 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10906 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010907
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010908 if ((enc == XML_CHAR_ENCODING_NONE) &&
10909 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010910 /*
10911 * Get the 4 first bytes and decode the charset
10912 * if enc != XML_CHAR_ENCODING_NONE
10913 * plug some encoding conversion routines.
10914 */
10915 start[0] = RAW;
10916 start[1] = NXT(1);
10917 start[2] = NXT(2);
10918 start[3] = NXT(3);
10919 enc = xmlDetectCharEncoding(start, 4);
10920 if (enc != XML_CHAR_ENCODING_NONE) {
10921 xmlSwitchEncoding(ctxt, enc);
10922 }
10923 }
10924
Owen Taylor3473f882001-02-23 17:55:21 +000010925 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10926
10927 if (ctxt->myDoc != NULL) {
10928 if (ctxt->wellFormed) {
10929 ret = ctxt->myDoc->extSubset;
10930 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010931 if (ret != NULL) {
10932 xmlNodePtr tmp;
10933
10934 ret->doc = NULL;
10935 tmp = ret->children;
10936 while (tmp != NULL) {
10937 tmp->doc = NULL;
10938 tmp = tmp->next;
10939 }
10940 }
Owen Taylor3473f882001-02-23 17:55:21 +000010941 } else {
10942 ret = NULL;
10943 }
10944 xmlFreeDoc(ctxt->myDoc);
10945 ctxt->myDoc = NULL;
10946 }
10947 if (sax != NULL) ctxt->sax = NULL;
10948 xmlFreeParserCtxt(ctxt);
10949
10950 return(ret);
10951}
10952
10953/**
10954 * xmlSAXParseDTD:
10955 * @sax: the SAX handler block
10956 * @ExternalID: a NAME* containing the External ID of the DTD
10957 * @SystemID: a NAME* containing the URL to the DTD
10958 *
10959 * Load and parse an external subset.
10960 *
10961 * Returns the resulting xmlDtdPtr or NULL in case of error.
10962 */
10963
10964xmlDtdPtr
10965xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10966 const xmlChar *SystemID) {
10967 xmlDtdPtr ret = NULL;
10968 xmlParserCtxtPtr ctxt;
10969 xmlParserInputPtr input = NULL;
10970 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010971 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010972
10973 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10974
10975 ctxt = xmlNewParserCtxt();
10976 if (ctxt == NULL) {
10977 return(NULL);
10978 }
10979
10980 /*
10981 * Set-up the SAX context
10982 */
10983 if (sax != NULL) {
10984 if (ctxt->sax != NULL)
10985 xmlFree(ctxt->sax);
10986 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010987 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010988 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010989
10990 /*
10991 * Canonicalise the system ID
10992 */
10993 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010994 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010995 xmlFreeParserCtxt(ctxt);
10996 return(NULL);
10997 }
Owen Taylor3473f882001-02-23 17:55:21 +000010998
10999 /*
11000 * Ask the Entity resolver to load the damn thing
11001 */
11002
11003 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011004 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11005 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011006 if (input == NULL) {
11007 if (sax != NULL) ctxt->sax = NULL;
11008 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011009 if (systemIdCanonic != NULL)
11010 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011011 return(NULL);
11012 }
11013
11014 /*
11015 * plug some encoding conversion routines here.
11016 */
11017 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011018 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11019 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11020 xmlSwitchEncoding(ctxt, enc);
11021 }
Owen Taylor3473f882001-02-23 17:55:21 +000011022
11023 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011024 input->filename = (char *) systemIdCanonic;
11025 else
11026 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011027 input->line = 1;
11028 input->col = 1;
11029 input->base = ctxt->input->cur;
11030 input->cur = ctxt->input->cur;
11031 input->free = NULL;
11032
11033 /*
11034 * let's parse that entity knowing it's an external subset.
11035 */
11036 ctxt->inSubset = 2;
11037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11038 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11039 ExternalID, SystemID);
11040 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11041
11042 if (ctxt->myDoc != NULL) {
11043 if (ctxt->wellFormed) {
11044 ret = ctxt->myDoc->extSubset;
11045 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011046 if (ret != NULL) {
11047 xmlNodePtr tmp;
11048
11049 ret->doc = NULL;
11050 tmp = ret->children;
11051 while (tmp != NULL) {
11052 tmp->doc = NULL;
11053 tmp = tmp->next;
11054 }
11055 }
Owen Taylor3473f882001-02-23 17:55:21 +000011056 } else {
11057 ret = NULL;
11058 }
11059 xmlFreeDoc(ctxt->myDoc);
11060 ctxt->myDoc = NULL;
11061 }
11062 if (sax != NULL) ctxt->sax = NULL;
11063 xmlFreeParserCtxt(ctxt);
11064
11065 return(ret);
11066}
11067
Daniel Veillard4432df22003-09-28 18:58:27 +000011068
Owen Taylor3473f882001-02-23 17:55:21 +000011069/**
11070 * xmlParseDTD:
11071 * @ExternalID: a NAME* containing the External ID of the DTD
11072 * @SystemID: a NAME* containing the URL to the DTD
11073 *
11074 * Load and parse an external subset.
11075 *
11076 * Returns the resulting xmlDtdPtr or NULL in case of error.
11077 */
11078
11079xmlDtdPtr
11080xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11081 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11082}
Daniel Veillard4432df22003-09-28 18:58:27 +000011083#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011084
11085/************************************************************************
11086 * *
11087 * Front ends when parsing an Entity *
11088 * *
11089 ************************************************************************/
11090
11091/**
Owen Taylor3473f882001-02-23 17:55:21 +000011092 * xmlParseCtxtExternalEntity:
11093 * @ctx: the existing parsing context
11094 * @URL: the URL for the entity to load
11095 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011096 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011097 *
11098 * Parse an external general entity within an existing parsing context
11099 * An external general parsed entity is well-formed if it matches the
11100 * production labeled extParsedEnt.
11101 *
11102 * [78] extParsedEnt ::= TextDecl? content
11103 *
11104 * Returns 0 if the entity is well formed, -1 in case of args problem and
11105 * the parser error code otherwise
11106 */
11107
11108int
11109xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011110 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011111 xmlParserCtxtPtr ctxt;
11112 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011113 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011114 xmlSAXHandlerPtr oldsax = NULL;
11115 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011116 xmlChar start[4];
11117 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011118 xmlParserInputPtr inputStream;
11119 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011120
Daniel Veillardce682bc2004-11-05 17:22:25 +000011121 if (ctx == NULL) return(-1);
11122
Owen Taylor3473f882001-02-23 17:55:21 +000011123 if (ctx->depth > 40) {
11124 return(XML_ERR_ENTITY_LOOP);
11125 }
11126
Daniel Veillardcda96922001-08-21 10:56:31 +000011127 if (lst != NULL)
11128 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011129 if ((URL == NULL) && (ID == NULL))
11130 return(-1);
11131 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11132 return(-1);
11133
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011134 ctxt = xmlNewParserCtxt();
11135 if (ctxt == NULL) {
11136 return(-1);
11137 }
11138
Owen Taylor3473f882001-02-23 17:55:21 +000011139 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011140 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011141
11142 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11143 if (inputStream == NULL) {
11144 xmlFreeParserCtxt(ctxt);
11145 return(-1);
11146 }
11147
11148 inputPush(ctxt, inputStream);
11149
11150 if ((ctxt->directory == NULL) && (directory == NULL))
11151 directory = xmlParserGetDirectory((char *)URL);
11152 if ((ctxt->directory == NULL) && (directory != NULL))
11153 ctxt->directory = directory;
11154
Owen Taylor3473f882001-02-23 17:55:21 +000011155 oldsax = ctxt->sax;
11156 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011157 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011158 newDoc = xmlNewDoc(BAD_CAST "1.0");
11159 if (newDoc == NULL) {
11160 xmlFreeParserCtxt(ctxt);
11161 return(-1);
11162 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011163 if (ctx->myDoc->dict) {
11164 newDoc->dict = ctx->myDoc->dict;
11165 xmlDictReference(newDoc->dict);
11166 }
Owen Taylor3473f882001-02-23 17:55:21 +000011167 if (ctx->myDoc != NULL) {
11168 newDoc->intSubset = ctx->myDoc->intSubset;
11169 newDoc->extSubset = ctx->myDoc->extSubset;
11170 }
11171 if (ctx->myDoc->URL != NULL) {
11172 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11173 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011174 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11175 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011176 ctxt->sax = oldsax;
11177 xmlFreeParserCtxt(ctxt);
11178 newDoc->intSubset = NULL;
11179 newDoc->extSubset = NULL;
11180 xmlFreeDoc(newDoc);
11181 return(-1);
11182 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011183 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011184 nodePush(ctxt, newDoc->children);
11185 if (ctx->myDoc == NULL) {
11186 ctxt->myDoc = newDoc;
11187 } else {
11188 ctxt->myDoc = ctx->myDoc;
11189 newDoc->children->doc = ctx->myDoc;
11190 }
11191
Daniel Veillard87a764e2001-06-20 17:41:10 +000011192 /*
11193 * Get the 4 first bytes and decode the charset
11194 * if enc != XML_CHAR_ENCODING_NONE
11195 * plug some encoding conversion routines.
11196 */
11197 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011198 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11199 start[0] = RAW;
11200 start[1] = NXT(1);
11201 start[2] = NXT(2);
11202 start[3] = NXT(3);
11203 enc = xmlDetectCharEncoding(start, 4);
11204 if (enc != XML_CHAR_ENCODING_NONE) {
11205 xmlSwitchEncoding(ctxt, enc);
11206 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011207 }
11208
Owen Taylor3473f882001-02-23 17:55:21 +000011209 /*
11210 * Parse a possible text declaration first
11211 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011212 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011213 xmlParseTextDecl(ctxt);
11214 }
11215
11216 /*
11217 * Doing validity checking on chunk doesn't make sense
11218 */
11219 ctxt->instate = XML_PARSER_CONTENT;
11220 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011221 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011222 ctxt->loadsubset = ctx->loadsubset;
11223 ctxt->depth = ctx->depth + 1;
11224 ctxt->replaceEntities = ctx->replaceEntities;
11225 if (ctxt->validate) {
11226 ctxt->vctxt.error = ctx->vctxt.error;
11227 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011228 } else {
11229 ctxt->vctxt.error = NULL;
11230 ctxt->vctxt.warning = NULL;
11231 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011232 ctxt->vctxt.nodeTab = NULL;
11233 ctxt->vctxt.nodeNr = 0;
11234 ctxt->vctxt.nodeMax = 0;
11235 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011236 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11237 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011238 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11239 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11240 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011241 ctxt->dictNames = ctx->dictNames;
11242 ctxt->attsDefault = ctx->attsDefault;
11243 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011244 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011245
11246 xmlParseContent(ctxt);
11247
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011248 ctx->validate = ctxt->validate;
11249 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011250 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011251 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011252 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011253 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011254 }
11255 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 }
11258
11259 if (!ctxt->wellFormed) {
11260 if (ctxt->errNo == 0)
11261 ret = 1;
11262 else
11263 ret = ctxt->errNo;
11264 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011265 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011266 xmlNodePtr cur;
11267
11268 /*
11269 * Return the newly created nodeset after unlinking it from
11270 * they pseudo parent.
11271 */
11272 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011273 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011274 while (cur != NULL) {
11275 cur->parent = NULL;
11276 cur = cur->next;
11277 }
11278 newDoc->children->children = NULL;
11279 }
11280 ret = 0;
11281 }
11282 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011283 ctxt->dict = NULL;
11284 ctxt->attsDefault = NULL;
11285 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011286 xmlFreeParserCtxt(ctxt);
11287 newDoc->intSubset = NULL;
11288 newDoc->extSubset = NULL;
11289 xmlFreeDoc(newDoc);
11290
11291 return(ret);
11292}
11293
11294/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011295 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011296 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011297 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011298 * @sax: the SAX handler bloc (possibly NULL)
11299 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11300 * @depth: Used for loop detection, use 0
11301 * @URL: the URL for the entity to load
11302 * @ID: the System ID for the entity to load
11303 * @list: the return value for the set of parsed nodes
11304 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011305 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011306 *
11307 * Returns 0 if the entity is well formed, -1 in case of args problem and
11308 * the parser error code otherwise
11309 */
11310
Daniel Veillard7d515752003-09-26 19:12:37 +000011311static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011312xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11313 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011314 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011315 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011316 xmlParserCtxtPtr ctxt;
11317 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011318 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011319 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011320 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011321 xmlChar start[4];
11322 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011323
11324 if (depth > 40) {
11325 return(XML_ERR_ENTITY_LOOP);
11326 }
11327
11328
11329
11330 if (list != NULL)
11331 *list = NULL;
11332 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011333 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011334 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011335 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011336
11337
11338 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011339 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011340 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011341 if (oldctxt != NULL) {
11342 ctxt->_private = oldctxt->_private;
11343 ctxt->loadsubset = oldctxt->loadsubset;
11344 ctxt->validate = oldctxt->validate;
11345 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011346 ctxt->record_info = oldctxt->record_info;
11347 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11348 ctxt->node_seq.length = oldctxt->node_seq.length;
11349 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011350 } else {
11351 /*
11352 * Doing validity checking on chunk without context
11353 * doesn't make sense
11354 */
11355 ctxt->_private = NULL;
11356 ctxt->validate = 0;
11357 ctxt->external = 2;
11358 ctxt->loadsubset = 0;
11359 }
Owen Taylor3473f882001-02-23 17:55:21 +000011360 if (sax != NULL) {
11361 oldsax = ctxt->sax;
11362 ctxt->sax = sax;
11363 if (user_data != NULL)
11364 ctxt->userData = user_data;
11365 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011366 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011367 newDoc = xmlNewDoc(BAD_CAST "1.0");
11368 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011369 ctxt->node_seq.maximum = 0;
11370 ctxt->node_seq.length = 0;
11371 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011372 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011373 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011375 newDoc->intSubset = doc->intSubset;
11376 newDoc->extSubset = doc->extSubset;
11377 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011378 xmlDictReference(newDoc->dict);
11379
Owen Taylor3473f882001-02-23 17:55:21 +000011380 if (doc->URL != NULL) {
11381 newDoc->URL = xmlStrdup(doc->URL);
11382 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011383 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11384 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011385 if (sax != NULL)
11386 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011387 ctxt->node_seq.maximum = 0;
11388 ctxt->node_seq.length = 0;
11389 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011390 xmlFreeParserCtxt(ctxt);
11391 newDoc->intSubset = NULL;
11392 newDoc->extSubset = NULL;
11393 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011394 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011395 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011396 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011397 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011398 ctxt->myDoc = doc;
11399 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011400
Daniel Veillard87a764e2001-06-20 17:41:10 +000011401 /*
11402 * Get the 4 first bytes and decode the charset
11403 * if enc != XML_CHAR_ENCODING_NONE
11404 * plug some encoding conversion routines.
11405 */
11406 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011407 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11408 start[0] = RAW;
11409 start[1] = NXT(1);
11410 start[2] = NXT(2);
11411 start[3] = NXT(3);
11412 enc = xmlDetectCharEncoding(start, 4);
11413 if (enc != XML_CHAR_ENCODING_NONE) {
11414 xmlSwitchEncoding(ctxt, enc);
11415 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011416 }
11417
Owen Taylor3473f882001-02-23 17:55:21 +000011418 /*
11419 * Parse a possible text declaration first
11420 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011421 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011422 xmlParseTextDecl(ctxt);
11423 }
11424
Owen Taylor3473f882001-02-23 17:55:21 +000011425 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011426 ctxt->depth = depth;
11427
11428 xmlParseContent(ctxt);
11429
Daniel Veillard561b7f82002-03-20 21:55:57 +000011430 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011431 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011432 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011433 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011434 }
11435 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011437 }
11438
11439 if (!ctxt->wellFormed) {
11440 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011441 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011442 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011443 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011444 } else {
11445 if (list != NULL) {
11446 xmlNodePtr cur;
11447
11448 /*
11449 * Return the newly created nodeset after unlinking it from
11450 * they pseudo parent.
11451 */
11452 cur = newDoc->children->children;
11453 *list = cur;
11454 while (cur != NULL) {
11455 cur->parent = NULL;
11456 cur = cur->next;
11457 }
11458 newDoc->children->children = NULL;
11459 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011460 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011461 }
11462 if (sax != NULL)
11463 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011464 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11465 oldctxt->node_seq.length = ctxt->node_seq.length;
11466 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011467 ctxt->node_seq.maximum = 0;
11468 ctxt->node_seq.length = 0;
11469 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011470 xmlFreeParserCtxt(ctxt);
11471 newDoc->intSubset = NULL;
11472 newDoc->extSubset = NULL;
11473 xmlFreeDoc(newDoc);
11474
11475 return(ret);
11476}
11477
Daniel Veillard81273902003-09-30 00:43:48 +000011478#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011479/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011480 * xmlParseExternalEntity:
11481 * @doc: the document the chunk pertains to
11482 * @sax: the SAX handler bloc (possibly NULL)
11483 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11484 * @depth: Used for loop detection, use 0
11485 * @URL: the URL for the entity to load
11486 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011487 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011488 *
11489 * Parse an external general entity
11490 * An external general parsed entity is well-formed if it matches the
11491 * production labeled extParsedEnt.
11492 *
11493 * [78] extParsedEnt ::= TextDecl? content
11494 *
11495 * Returns 0 if the entity is well formed, -1 in case of args problem and
11496 * the parser error code otherwise
11497 */
11498
11499int
11500xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011501 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011502 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011503 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011504}
11505
11506/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011507 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011508 * @doc: the document the chunk pertains to
11509 * @sax: the SAX handler bloc (possibly NULL)
11510 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11511 * @depth: Used for loop detection, use 0
11512 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011513 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011514 *
11515 * Parse a well-balanced chunk of an XML document
11516 * called by the parser
11517 * The allowed sequence for the Well Balanced Chunk is the one defined by
11518 * the content production in the XML grammar:
11519 *
11520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11521 *
11522 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11523 * the parser error code otherwise
11524 */
11525
11526int
11527xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011528 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011529 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11530 depth, string, lst, 0 );
11531}
Daniel Veillard81273902003-09-30 00:43:48 +000011532#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011533
11534/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011535 * xmlParseBalancedChunkMemoryInternal:
11536 * @oldctxt: the existing parsing context
11537 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11538 * @user_data: the user data field for the parser context
11539 * @lst: the return value for the set of parsed nodes
11540 *
11541 *
11542 * Parse a well-balanced chunk of an XML document
11543 * called by the parser
11544 * The allowed sequence for the Well Balanced Chunk is the one defined by
11545 * the content production in the XML grammar:
11546 *
11547 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11548 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011549 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11550 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011551 *
11552 * In case recover is set to 1, the nodelist will not be empty even if
11553 * the parsed chunk is not well balanced.
11554 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011555static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11557 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11558 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011559 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011560 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011561 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011562 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011563 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011564 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011565 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011566
11567 if (oldctxt->depth > 40) {
11568 return(XML_ERR_ENTITY_LOOP);
11569 }
11570
11571
11572 if (lst != NULL)
11573 *lst = NULL;
11574 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011575 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011576
11577 size = xmlStrlen(string);
11578
11579 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011580 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011581 if (user_data != NULL)
11582 ctxt->userData = user_data;
11583 else
11584 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011585 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11586 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011587 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11588 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11589 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011590
11591 oldsax = ctxt->sax;
11592 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011593 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011594 ctxt->replaceEntities = oldctxt->replaceEntities;
11595 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011596
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011597 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011598 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011599 newDoc = xmlNewDoc(BAD_CAST "1.0");
11600 if (newDoc == NULL) {
11601 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011602 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011603 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011604 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011605 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011606 newDoc->dict = ctxt->dict;
11607 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011608 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011609 } else {
11610 ctxt->myDoc = oldctxt->myDoc;
11611 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011612 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011613 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011614 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11615 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011616 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011617 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011618 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011619 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011620 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011621 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011622 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011623 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011624 ctxt->myDoc->children = NULL;
11625 ctxt->myDoc->last = NULL;
11626 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011627 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011628 ctxt->instate = XML_PARSER_CONTENT;
11629 ctxt->depth = oldctxt->depth + 1;
11630
Daniel Veillard328f48c2002-11-15 15:24:34 +000011631 ctxt->validate = 0;
11632 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011633 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11634 /*
11635 * ID/IDREF registration will be done in xmlValidateElement below
11636 */
11637 ctxt->loadsubset |= XML_SKIP_IDS;
11638 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011639 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011640 ctxt->attsDefault = oldctxt->attsDefault;
11641 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011642
Daniel Veillard68e9e742002-11-16 15:35:11 +000011643 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011646 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011647 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011648 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011649 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011651 }
11652
11653 if (!ctxt->wellFormed) {
11654 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011655 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011656 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011657 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011658 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011659 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011660 }
11661
William M. Brack7b9154b2003-09-27 19:23:50 +000011662 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011663 xmlNodePtr cur;
11664
11665 /*
11666 * Return the newly created nodeset after unlinking it from
11667 * they pseudo parent.
11668 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011669 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011670 *lst = cur;
11671 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011672#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011673 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11674 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11675 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011676 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11677 oldctxt->myDoc, cur);
11678 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011679#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011680 cur->parent = NULL;
11681 cur = cur->next;
11682 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011683 ctxt->myDoc->children->children = NULL;
11684 }
11685 if (ctxt->myDoc != NULL) {
11686 xmlFreeNode(ctxt->myDoc->children);
11687 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011688 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011689 }
11690
11691 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011692 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011693 ctxt->attsDefault = NULL;
11694 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011695 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011696 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011697 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011698 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011699
11700 return(ret);
11701}
11702
Daniel Veillard29b17482004-08-16 00:39:03 +000011703/**
11704 * xmlParseInNodeContext:
11705 * @node: the context node
11706 * @data: the input string
11707 * @datalen: the input string length in bytes
11708 * @options: a combination of xmlParserOption
11709 * @lst: the return value for the set of parsed nodes
11710 *
11711 * Parse a well-balanced chunk of an XML document
11712 * within the context (DTD, namespaces, etc ...) of the given node.
11713 *
11714 * The allowed sequence for the data is a Well Balanced Chunk defined by
11715 * the content production in the XML grammar:
11716 *
11717 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11718 *
11719 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11720 * error code otherwise
11721 */
11722xmlParserErrors
11723xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11724 int options, xmlNodePtr *lst) {
11725#ifdef SAX2
11726 xmlParserCtxtPtr ctxt;
11727 xmlDocPtr doc = NULL;
11728 xmlNodePtr fake, cur;
11729 int nsnr = 0;
11730
11731 xmlParserErrors ret = XML_ERR_OK;
11732
11733 /*
11734 * check all input parameters, grab the document
11735 */
11736 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11737 return(XML_ERR_INTERNAL_ERROR);
11738 switch (node->type) {
11739 case XML_ELEMENT_NODE:
11740 case XML_ATTRIBUTE_NODE:
11741 case XML_TEXT_NODE:
11742 case XML_CDATA_SECTION_NODE:
11743 case XML_ENTITY_REF_NODE:
11744 case XML_PI_NODE:
11745 case XML_COMMENT_NODE:
11746 case XML_DOCUMENT_NODE:
11747 case XML_HTML_DOCUMENT_NODE:
11748 break;
11749 default:
11750 return(XML_ERR_INTERNAL_ERROR);
11751
11752 }
11753 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11754 (node->type != XML_DOCUMENT_NODE) &&
11755 (node->type != XML_HTML_DOCUMENT_NODE))
11756 node = node->parent;
11757 if (node == NULL)
11758 return(XML_ERR_INTERNAL_ERROR);
11759 if (node->type == XML_ELEMENT_NODE)
11760 doc = node->doc;
11761 else
11762 doc = (xmlDocPtr) node;
11763 if (doc == NULL)
11764 return(XML_ERR_INTERNAL_ERROR);
11765
11766 /*
11767 * allocate a context and set-up everything not related to the
11768 * node position in the tree
11769 */
11770 if (doc->type == XML_DOCUMENT_NODE)
11771 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11772#ifdef LIBXML_HTML_ENABLED
11773 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11774 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11775#endif
11776 else
11777 return(XML_ERR_INTERNAL_ERROR);
11778
11779 if (ctxt == NULL)
11780 return(XML_ERR_NO_MEMORY);
11781 fake = xmlNewComment(NULL);
11782 if (fake == NULL) {
11783 xmlFreeParserCtxt(ctxt);
11784 return(XML_ERR_NO_MEMORY);
11785 }
11786 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011787
11788 /*
11789 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11790 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11791 * we must wait until the last moment to free the original one.
11792 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011793 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011794 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011795 xmlDictFree(ctxt->dict);
11796 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011797 } else
11798 options |= XML_PARSE_NODICT;
11799
11800 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011801 xmlDetectSAX2(ctxt);
11802 ctxt->myDoc = doc;
11803
11804 if (node->type == XML_ELEMENT_NODE) {
11805 nodePush(ctxt, node);
11806 /*
11807 * initialize the SAX2 namespaces stack
11808 */
11809 cur = node;
11810 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11811 xmlNsPtr ns = cur->nsDef;
11812 const xmlChar *iprefix, *ihref;
11813
11814 while (ns != NULL) {
11815 if (ctxt->dict) {
11816 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11817 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11818 } else {
11819 iprefix = ns->prefix;
11820 ihref = ns->href;
11821 }
11822
11823 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11824 nsPush(ctxt, iprefix, ihref);
11825 nsnr++;
11826 }
11827 ns = ns->next;
11828 }
11829 cur = cur->parent;
11830 }
11831 ctxt->instate = XML_PARSER_CONTENT;
11832 }
11833
11834 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11835 /*
11836 * ID/IDREF registration will be done in xmlValidateElement below
11837 */
11838 ctxt->loadsubset |= XML_SKIP_IDS;
11839 }
11840
Daniel Veillard499cc922006-01-18 17:22:35 +000011841#ifdef LIBXML_HTML_ENABLED
11842 if (doc->type == XML_HTML_DOCUMENT_NODE)
11843 __htmlParseContent(ctxt);
11844 else
11845#endif
11846 xmlParseContent(ctxt);
11847
Daniel Veillard29b17482004-08-16 00:39:03 +000011848 nsPop(ctxt, nsnr);
11849 if ((RAW == '<') && (NXT(1) == '/')) {
11850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11851 } else if (RAW != 0) {
11852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11853 }
11854 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11856 ctxt->wellFormed = 0;
11857 }
11858
11859 if (!ctxt->wellFormed) {
11860 if (ctxt->errNo == 0)
11861 ret = XML_ERR_INTERNAL_ERROR;
11862 else
11863 ret = (xmlParserErrors)ctxt->errNo;
11864 } else {
11865 ret = XML_ERR_OK;
11866 }
11867
11868 /*
11869 * Return the newly created nodeset after unlinking it from
11870 * the pseudo sibling.
11871 */
11872
11873 cur = fake->next;
11874 fake->next = NULL;
11875 node->last = fake;
11876
11877 if (cur != NULL) {
11878 cur->prev = NULL;
11879 }
11880
11881 *lst = cur;
11882
11883 while (cur != NULL) {
11884 cur->parent = NULL;
11885 cur = cur->next;
11886 }
11887
11888 xmlUnlinkNode(fake);
11889 xmlFreeNode(fake);
11890
11891
11892 if (ret != XML_ERR_OK) {
11893 xmlFreeNodeList(*lst);
11894 *lst = NULL;
11895 }
William M. Brackc3f81342004-10-03 01:22:44 +000011896
William M. Brackb7b54de2004-10-06 16:38:01 +000011897 if (doc->dict != NULL)
11898 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011899 xmlFreeParserCtxt(ctxt);
11900
11901 return(ret);
11902#else /* !SAX2 */
11903 return(XML_ERR_INTERNAL_ERROR);
11904#endif
11905}
11906
Daniel Veillard81273902003-09-30 00:43:48 +000011907#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011908/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011909 * xmlParseBalancedChunkMemoryRecover:
11910 * @doc: the document the chunk pertains to
11911 * @sax: the SAX handler bloc (possibly NULL)
11912 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11913 * @depth: Used for loop detection, use 0
11914 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11915 * @lst: the return value for the set of parsed nodes
11916 * @recover: return nodes even if the data is broken (use 0)
11917 *
11918 *
11919 * Parse a well-balanced chunk of an XML document
11920 * called by the parser
11921 * The allowed sequence for the Well Balanced Chunk is the one defined by
11922 * the content production in the XML grammar:
11923 *
11924 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11925 *
11926 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11927 * the parser error code otherwise
11928 *
11929 * In case recover is set to 1, the nodelist will not be empty even if
11930 * the parsed chunk is not well balanced.
11931 */
11932int
11933xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11934 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11935 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011936 xmlParserCtxtPtr ctxt;
11937 xmlDocPtr newDoc;
11938 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011939 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011940 int size;
11941 int ret = 0;
11942
11943 if (depth > 40) {
11944 return(XML_ERR_ENTITY_LOOP);
11945 }
11946
11947
Daniel Veillardcda96922001-08-21 10:56:31 +000011948 if (lst != NULL)
11949 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011950 if (string == NULL)
11951 return(-1);
11952
11953 size = xmlStrlen(string);
11954
11955 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11956 if (ctxt == NULL) return(-1);
11957 ctxt->userData = ctxt;
11958 if (sax != NULL) {
11959 oldsax = ctxt->sax;
11960 ctxt->sax = sax;
11961 if (user_data != NULL)
11962 ctxt->userData = user_data;
11963 }
11964 newDoc = xmlNewDoc(BAD_CAST "1.0");
11965 if (newDoc == NULL) {
11966 xmlFreeParserCtxt(ctxt);
11967 return(-1);
11968 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011969 if ((doc != NULL) && (doc->dict != NULL)) {
11970 xmlDictFree(ctxt->dict);
11971 ctxt->dict = doc->dict;
11972 xmlDictReference(ctxt->dict);
11973 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11974 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11975 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11976 ctxt->dictNames = 1;
11977 } else {
11978 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11979 }
Owen Taylor3473f882001-02-23 17:55:21 +000011980 if (doc != NULL) {
11981 newDoc->intSubset = doc->intSubset;
11982 newDoc->extSubset = doc->extSubset;
11983 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011984 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11985 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011986 if (sax != NULL)
11987 ctxt->sax = oldsax;
11988 xmlFreeParserCtxt(ctxt);
11989 newDoc->intSubset = NULL;
11990 newDoc->extSubset = NULL;
11991 xmlFreeDoc(newDoc);
11992 return(-1);
11993 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011994 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11995 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011996 if (doc == NULL) {
11997 ctxt->myDoc = newDoc;
11998 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011999 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012000 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012001 /* Ensure that doc has XML spec namespace */
12002 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12003 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012004 }
12005 ctxt->instate = XML_PARSER_CONTENT;
12006 ctxt->depth = depth;
12007
12008 /*
12009 * Doing validity checking on chunk doesn't make sense
12010 */
12011 ctxt->validate = 0;
12012 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012013 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012014
Daniel Veillardb39bc392002-10-26 19:29:51 +000012015 if ( doc != NULL ){
12016 content = doc->children;
12017 doc->children = NULL;
12018 xmlParseContent(ctxt);
12019 doc->children = content;
12020 }
12021 else {
12022 xmlParseContent(ctxt);
12023 }
Owen Taylor3473f882001-02-23 17:55:21 +000012024 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012025 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012026 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012027 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012028 }
12029 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012030 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012031 }
12032
12033 if (!ctxt->wellFormed) {
12034 if (ctxt->errNo == 0)
12035 ret = 1;
12036 else
12037 ret = ctxt->errNo;
12038 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012039 ret = 0;
12040 }
12041
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012042 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12043 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012044
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012045 /*
12046 * Return the newly created nodeset after unlinking it from
12047 * they pseudo parent.
12048 */
12049 cur = newDoc->children->children;
12050 *lst = cur;
12051 while (cur != NULL) {
12052 xmlSetTreeDoc(cur, doc);
12053 cur->parent = NULL;
12054 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012055 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012056 newDoc->children->children = NULL;
12057 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012058
Owen Taylor3473f882001-02-23 17:55:21 +000012059 if (sax != NULL)
12060 ctxt->sax = oldsax;
12061 xmlFreeParserCtxt(ctxt);
12062 newDoc->intSubset = NULL;
12063 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012064 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012065 xmlFreeDoc(newDoc);
12066
12067 return(ret);
12068}
12069
12070/**
12071 * xmlSAXParseEntity:
12072 * @sax: the SAX handler block
12073 * @filename: the filename
12074 *
12075 * parse an XML external entity out of context and build a tree.
12076 * It use the given SAX function block to handle the parsing callback.
12077 * If sax is NULL, fallback to the default DOM tree building routines.
12078 *
12079 * [78] extParsedEnt ::= TextDecl? content
12080 *
12081 * This correspond to a "Well Balanced" chunk
12082 *
12083 * Returns the resulting document tree
12084 */
12085
12086xmlDocPtr
12087xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12088 xmlDocPtr ret;
12089 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012090
12091 ctxt = xmlCreateFileParserCtxt(filename);
12092 if (ctxt == NULL) {
12093 return(NULL);
12094 }
12095 if (sax != NULL) {
12096 if (ctxt->sax != NULL)
12097 xmlFree(ctxt->sax);
12098 ctxt->sax = sax;
12099 ctxt->userData = NULL;
12100 }
12101
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlParseExtParsedEnt(ctxt);
12103
12104 if (ctxt->wellFormed)
12105 ret = ctxt->myDoc;
12106 else {
12107 ret = NULL;
12108 xmlFreeDoc(ctxt->myDoc);
12109 ctxt->myDoc = NULL;
12110 }
12111 if (sax != NULL)
12112 ctxt->sax = NULL;
12113 xmlFreeParserCtxt(ctxt);
12114
12115 return(ret);
12116}
12117
12118/**
12119 * xmlParseEntity:
12120 * @filename: the filename
12121 *
12122 * parse an XML external entity out of context and build a tree.
12123 *
12124 * [78] extParsedEnt ::= TextDecl? content
12125 *
12126 * This correspond to a "Well Balanced" chunk
12127 *
12128 * Returns the resulting document tree
12129 */
12130
12131xmlDocPtr
12132xmlParseEntity(const char *filename) {
12133 return(xmlSAXParseEntity(NULL, filename));
12134}
Daniel Veillard81273902003-09-30 00:43:48 +000012135#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012136
12137/**
12138 * xmlCreateEntityParserCtxt:
12139 * @URL: the entity URL
12140 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012141 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012142 *
12143 * Create a parser context for an external entity
12144 * Automatic support for ZLIB/Compress compressed document is provided
12145 * by default if found at compile-time.
12146 *
12147 * Returns the new parser context or NULL
12148 */
12149xmlParserCtxtPtr
12150xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12151 const xmlChar *base) {
12152 xmlParserCtxtPtr ctxt;
12153 xmlParserInputPtr inputStream;
12154 char *directory = NULL;
12155 xmlChar *uri;
12156
12157 ctxt = xmlNewParserCtxt();
12158 if (ctxt == NULL) {
12159 return(NULL);
12160 }
12161
12162 uri = xmlBuildURI(URL, base);
12163
12164 if (uri == NULL) {
12165 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12166 if (inputStream == NULL) {
12167 xmlFreeParserCtxt(ctxt);
12168 return(NULL);
12169 }
12170
12171 inputPush(ctxt, inputStream);
12172
12173 if ((ctxt->directory == NULL) && (directory == NULL))
12174 directory = xmlParserGetDirectory((char *)URL);
12175 if ((ctxt->directory == NULL) && (directory != NULL))
12176 ctxt->directory = directory;
12177 } else {
12178 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12179 if (inputStream == NULL) {
12180 xmlFree(uri);
12181 xmlFreeParserCtxt(ctxt);
12182 return(NULL);
12183 }
12184
12185 inputPush(ctxt, inputStream);
12186
12187 if ((ctxt->directory == NULL) && (directory == NULL))
12188 directory = xmlParserGetDirectory((char *)uri);
12189 if ((ctxt->directory == NULL) && (directory != NULL))
12190 ctxt->directory = directory;
12191 xmlFree(uri);
12192 }
Owen Taylor3473f882001-02-23 17:55:21 +000012193 return(ctxt);
12194}
12195
12196/************************************************************************
12197 * *
12198 * Front ends when parsing from a file *
12199 * *
12200 ************************************************************************/
12201
12202/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012203 * xmlCreateURLParserCtxt:
12204 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012205 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012206 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012207 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012208 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012209 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012210 *
12211 * Returns the new parser context or NULL
12212 */
12213xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012214xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012215{
12216 xmlParserCtxtPtr ctxt;
12217 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012218 char *directory = NULL;
12219
Owen Taylor3473f882001-02-23 17:55:21 +000012220 ctxt = xmlNewParserCtxt();
12221 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012222 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012223 return(NULL);
12224 }
12225
Daniel Veillarddf292f72005-01-16 19:00:15 +000012226 if (options)
12227 xmlCtxtUseOptions(ctxt, options);
12228 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012229
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012230 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012231 if (inputStream == NULL) {
12232 xmlFreeParserCtxt(ctxt);
12233 return(NULL);
12234 }
12235
Owen Taylor3473f882001-02-23 17:55:21 +000012236 inputPush(ctxt, inputStream);
12237 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012238 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012239 if ((ctxt->directory == NULL) && (directory != NULL))
12240 ctxt->directory = directory;
12241
12242 return(ctxt);
12243}
12244
Daniel Veillard61b93382003-11-03 14:28:31 +000012245/**
12246 * xmlCreateFileParserCtxt:
12247 * @filename: the filename
12248 *
12249 * Create a parser context for a file content.
12250 * Automatic support for ZLIB/Compress compressed document is provided
12251 * by default if found at compile-time.
12252 *
12253 * Returns the new parser context or NULL
12254 */
12255xmlParserCtxtPtr
12256xmlCreateFileParserCtxt(const char *filename)
12257{
12258 return(xmlCreateURLParserCtxt(filename, 0));
12259}
12260
Daniel Veillard81273902003-09-30 00:43:48 +000012261#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012262/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012263 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012264 * @sax: the SAX handler block
12265 * @filename: the filename
12266 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12267 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012268 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012269 *
12270 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12271 * compressed document is provided by default if found at compile-time.
12272 * It use the given SAX function block to handle the parsing callback.
12273 * If sax is NULL, fallback to the default DOM tree building routines.
12274 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012275 * User data (void *) is stored within the parser context in the
12276 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012277 *
Owen Taylor3473f882001-02-23 17:55:21 +000012278 * Returns the resulting document tree
12279 */
12280
12281xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012282xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12283 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012284 xmlDocPtr ret;
12285 xmlParserCtxtPtr ctxt;
12286 char *directory = NULL;
12287
Daniel Veillard635ef722001-10-29 11:48:19 +000012288 xmlInitParser();
12289
Owen Taylor3473f882001-02-23 17:55:21 +000012290 ctxt = xmlCreateFileParserCtxt(filename);
12291 if (ctxt == NULL) {
12292 return(NULL);
12293 }
12294 if (sax != NULL) {
12295 if (ctxt->sax != NULL)
12296 xmlFree(ctxt->sax);
12297 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012298 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012299 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012300 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012301 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012302 }
Owen Taylor3473f882001-02-23 17:55:21 +000012303
12304 if ((ctxt->directory == NULL) && (directory == NULL))
12305 directory = xmlParserGetDirectory(filename);
12306 if ((ctxt->directory == NULL) && (directory != NULL))
12307 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12308
Daniel Veillarddad3f682002-11-17 16:47:27 +000012309 ctxt->recovery = recovery;
12310
Owen Taylor3473f882001-02-23 17:55:21 +000012311 xmlParseDocument(ctxt);
12312
William M. Brackc07329e2003-09-08 01:57:30 +000012313 if ((ctxt->wellFormed) || recovery) {
12314 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012315 if (ret != NULL) {
12316 if (ctxt->input->buf->compressed > 0)
12317 ret->compression = 9;
12318 else
12319 ret->compression = ctxt->input->buf->compressed;
12320 }
William M. Brackc07329e2003-09-08 01:57:30 +000012321 }
Owen Taylor3473f882001-02-23 17:55:21 +000012322 else {
12323 ret = NULL;
12324 xmlFreeDoc(ctxt->myDoc);
12325 ctxt->myDoc = NULL;
12326 }
12327 if (sax != NULL)
12328 ctxt->sax = NULL;
12329 xmlFreeParserCtxt(ctxt);
12330
12331 return(ret);
12332}
12333
12334/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012335 * xmlSAXParseFile:
12336 * @sax: the SAX handler block
12337 * @filename: the filename
12338 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12339 * documents
12340 *
12341 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12342 * compressed document is provided by default if found at compile-time.
12343 * It use the given SAX function block to handle the parsing callback.
12344 * If sax is NULL, fallback to the default DOM tree building routines.
12345 *
12346 * Returns the resulting document tree
12347 */
12348
12349xmlDocPtr
12350xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12351 int recovery) {
12352 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12353}
12354
12355/**
Owen Taylor3473f882001-02-23 17:55:21 +000012356 * xmlRecoverDoc:
12357 * @cur: a pointer to an array of xmlChar
12358 *
12359 * parse an XML in-memory document and build a tree.
12360 * In the case the document is not Well Formed, a tree is built anyway
12361 *
12362 * Returns the resulting document tree
12363 */
12364
12365xmlDocPtr
12366xmlRecoverDoc(xmlChar *cur) {
12367 return(xmlSAXParseDoc(NULL, cur, 1));
12368}
12369
12370/**
12371 * xmlParseFile:
12372 * @filename: the filename
12373 *
12374 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12375 * compressed document is provided by default if found at compile-time.
12376 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012377 * Returns the resulting document tree if the file was wellformed,
12378 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012379 */
12380
12381xmlDocPtr
12382xmlParseFile(const char *filename) {
12383 return(xmlSAXParseFile(NULL, filename, 0));
12384}
12385
12386/**
12387 * xmlRecoverFile:
12388 * @filename: the filename
12389 *
12390 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12391 * compressed document is provided by default if found at compile-time.
12392 * In the case the document is not Well Formed, a tree is built anyway
12393 *
12394 * Returns the resulting document tree
12395 */
12396
12397xmlDocPtr
12398xmlRecoverFile(const char *filename) {
12399 return(xmlSAXParseFile(NULL, filename, 1));
12400}
12401
12402
12403/**
12404 * xmlSetupParserForBuffer:
12405 * @ctxt: an XML parser context
12406 * @buffer: a xmlChar * buffer
12407 * @filename: a file name
12408 *
12409 * Setup the parser context to parse a new buffer; Clears any prior
12410 * contents from the parser context. The buffer parameter must not be
12411 * NULL, but the filename parameter can be
12412 */
12413void
12414xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12415 const char* filename)
12416{
12417 xmlParserInputPtr input;
12418
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012419 if ((ctxt == NULL) || (buffer == NULL))
12420 return;
12421
Owen Taylor3473f882001-02-23 17:55:21 +000012422 input = xmlNewInputStream(ctxt);
12423 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012424 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012425 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012426 return;
12427 }
12428
12429 xmlClearParserCtxt(ctxt);
12430 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012431 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012432 input->base = buffer;
12433 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012434 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012435 inputPush(ctxt, input);
12436}
12437
12438/**
12439 * xmlSAXUserParseFile:
12440 * @sax: a SAX handler
12441 * @user_data: The user data returned on SAX callbacks
12442 * @filename: a file name
12443 *
12444 * parse an XML file and call the given SAX handler routines.
12445 * Automatic support for ZLIB/Compress compressed document is provided
12446 *
12447 * Returns 0 in case of success or a error number otherwise
12448 */
12449int
12450xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12451 const char *filename) {
12452 int ret = 0;
12453 xmlParserCtxtPtr ctxt;
12454
12455 ctxt = xmlCreateFileParserCtxt(filename);
12456 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012457#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012458 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012459#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012460 xmlFree(ctxt->sax);
12461 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012462 xmlDetectSAX2(ctxt);
12463
Owen Taylor3473f882001-02-23 17:55:21 +000012464 if (user_data != NULL)
12465 ctxt->userData = user_data;
12466
12467 xmlParseDocument(ctxt);
12468
12469 if (ctxt->wellFormed)
12470 ret = 0;
12471 else {
12472 if (ctxt->errNo != 0)
12473 ret = ctxt->errNo;
12474 else
12475 ret = -1;
12476 }
12477 if (sax != NULL)
12478 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012479 if (ctxt->myDoc != NULL) {
12480 xmlFreeDoc(ctxt->myDoc);
12481 ctxt->myDoc = NULL;
12482 }
Owen Taylor3473f882001-02-23 17:55:21 +000012483 xmlFreeParserCtxt(ctxt);
12484
12485 return ret;
12486}
Daniel Veillard81273902003-09-30 00:43:48 +000012487#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012488
12489/************************************************************************
12490 * *
12491 * Front ends when parsing from memory *
12492 * *
12493 ************************************************************************/
12494
12495/**
12496 * xmlCreateMemoryParserCtxt:
12497 * @buffer: a pointer to a char array
12498 * @size: the size of the array
12499 *
12500 * Create a parser context for an XML in-memory document.
12501 *
12502 * Returns the new parser context or NULL
12503 */
12504xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012505xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012506 xmlParserCtxtPtr ctxt;
12507 xmlParserInputPtr input;
12508 xmlParserInputBufferPtr buf;
12509
12510 if (buffer == NULL)
12511 return(NULL);
12512 if (size <= 0)
12513 return(NULL);
12514
12515 ctxt = xmlNewParserCtxt();
12516 if (ctxt == NULL)
12517 return(NULL);
12518
Daniel Veillard53350552003-09-18 13:35:51 +000012519 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012520 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012521 if (buf == NULL) {
12522 xmlFreeParserCtxt(ctxt);
12523 return(NULL);
12524 }
Owen Taylor3473f882001-02-23 17:55:21 +000012525
12526 input = xmlNewInputStream(ctxt);
12527 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012528 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012529 xmlFreeParserCtxt(ctxt);
12530 return(NULL);
12531 }
12532
12533 input->filename = NULL;
12534 input->buf = buf;
12535 input->base = input->buf->buffer->content;
12536 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012537 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012538
12539 inputPush(ctxt, input);
12540 return(ctxt);
12541}
12542
Daniel Veillard81273902003-09-30 00:43:48 +000012543#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012544/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012545 * xmlSAXParseMemoryWithData:
12546 * @sax: the SAX handler block
12547 * @buffer: an pointer to a char array
12548 * @size: the size of the array
12549 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12550 * documents
12551 * @data: the userdata
12552 *
12553 * parse an XML in-memory block and use the given SAX function block
12554 * to handle the parsing callback. If sax is NULL, fallback to the default
12555 * DOM tree building routines.
12556 *
12557 * User data (void *) is stored within the parser context in the
12558 * context's _private member, so it is available nearly everywhere in libxml
12559 *
12560 * Returns the resulting document tree
12561 */
12562
12563xmlDocPtr
12564xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12565 int size, int recovery, void *data) {
12566 xmlDocPtr ret;
12567 xmlParserCtxtPtr ctxt;
12568
12569 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12570 if (ctxt == NULL) return(NULL);
12571 if (sax != NULL) {
12572 if (ctxt->sax != NULL)
12573 xmlFree(ctxt->sax);
12574 ctxt->sax = sax;
12575 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012576 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012577 if (data!=NULL) {
12578 ctxt->_private=data;
12579 }
12580
Daniel Veillardadba5f12003-04-04 16:09:01 +000012581 ctxt->recovery = recovery;
12582
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012583 xmlParseDocument(ctxt);
12584
12585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12586 else {
12587 ret = NULL;
12588 xmlFreeDoc(ctxt->myDoc);
12589 ctxt->myDoc = NULL;
12590 }
12591 if (sax != NULL)
12592 ctxt->sax = NULL;
12593 xmlFreeParserCtxt(ctxt);
12594
12595 return(ret);
12596}
12597
12598/**
Owen Taylor3473f882001-02-23 17:55:21 +000012599 * xmlSAXParseMemory:
12600 * @sax: the SAX handler block
12601 * @buffer: an pointer to a char array
12602 * @size: the size of the array
12603 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12604 * documents
12605 *
12606 * parse an XML in-memory block and use the given SAX function block
12607 * to handle the parsing callback. If sax is NULL, fallback to the default
12608 * DOM tree building routines.
12609 *
12610 * Returns the resulting document tree
12611 */
12612xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012613xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12614 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012615 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012616}
12617
12618/**
12619 * xmlParseMemory:
12620 * @buffer: an pointer to a char array
12621 * @size: the size of the array
12622 *
12623 * parse an XML in-memory block and build a tree.
12624 *
12625 * Returns the resulting document tree
12626 */
12627
Daniel Veillard50822cb2001-07-26 20:05:51 +000012628xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012629 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12630}
12631
12632/**
12633 * xmlRecoverMemory:
12634 * @buffer: an pointer to a char array
12635 * @size: the size of the array
12636 *
12637 * parse an XML in-memory block and build a tree.
12638 * In the case the document is not Well Formed, a tree is built anyway
12639 *
12640 * Returns the resulting document tree
12641 */
12642
Daniel Veillard50822cb2001-07-26 20:05:51 +000012643xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012644 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12645}
12646
12647/**
12648 * xmlSAXUserParseMemory:
12649 * @sax: a SAX handler
12650 * @user_data: The user data returned on SAX callbacks
12651 * @buffer: an in-memory XML document input
12652 * @size: the length of the XML document in bytes
12653 *
12654 * A better SAX parsing routine.
12655 * parse an XML in-memory buffer and call the given SAX handler routines.
12656 *
12657 * Returns 0 in case of success or a error number otherwise
12658 */
12659int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012660 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012661 int ret = 0;
12662 xmlParserCtxtPtr ctxt;
12663 xmlSAXHandlerPtr oldsax = NULL;
12664
Daniel Veillard9e923512002-08-14 08:48:52 +000012665 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012666 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12667 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012668 oldsax = ctxt->sax;
12669 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012670 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012671 if (user_data != NULL)
12672 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012673
12674 xmlParseDocument(ctxt);
12675
12676 if (ctxt->wellFormed)
12677 ret = 0;
12678 else {
12679 if (ctxt->errNo != 0)
12680 ret = ctxt->errNo;
12681 else
12682 ret = -1;
12683 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012684 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012685 if (ctxt->myDoc != NULL) {
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12688 }
Owen Taylor3473f882001-02-23 17:55:21 +000012689 xmlFreeParserCtxt(ctxt);
12690
12691 return ret;
12692}
Daniel Veillard81273902003-09-30 00:43:48 +000012693#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012694
12695/**
12696 * xmlCreateDocParserCtxt:
12697 * @cur: a pointer to an array of xmlChar
12698 *
12699 * Creates a parser context for an XML in-memory document.
12700 *
12701 * Returns the new parser context or NULL
12702 */
12703xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012704xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012705 int len;
12706
12707 if (cur == NULL)
12708 return(NULL);
12709 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012710 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012711}
12712
Daniel Veillard81273902003-09-30 00:43:48 +000012713#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012714/**
12715 * xmlSAXParseDoc:
12716 * @sax: the SAX handler block
12717 * @cur: a pointer to an array of xmlChar
12718 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12719 * documents
12720 *
12721 * parse an XML in-memory document and build a tree.
12722 * It use the given SAX function block to handle the parsing callback.
12723 * If sax is NULL, fallback to the default DOM tree building routines.
12724 *
12725 * Returns the resulting document tree
12726 */
12727
12728xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012729xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012730 xmlDocPtr ret;
12731 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012732 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012733
Daniel Veillard38936062004-11-04 17:45:11 +000012734 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012735
12736
12737 ctxt = xmlCreateDocParserCtxt(cur);
12738 if (ctxt == NULL) return(NULL);
12739 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012740 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012741 ctxt->sax = sax;
12742 ctxt->userData = NULL;
12743 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012744 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012745
12746 xmlParseDocument(ctxt);
12747 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12748 else {
12749 ret = NULL;
12750 xmlFreeDoc(ctxt->myDoc);
12751 ctxt->myDoc = NULL;
12752 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012753 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012754 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012755 xmlFreeParserCtxt(ctxt);
12756
12757 return(ret);
12758}
12759
12760/**
12761 * xmlParseDoc:
12762 * @cur: a pointer to an array of xmlChar
12763 *
12764 * parse an XML in-memory document and build a tree.
12765 *
12766 * Returns the resulting document tree
12767 */
12768
12769xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012770xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012771 return(xmlSAXParseDoc(NULL, cur, 0));
12772}
Daniel Veillard81273902003-09-30 00:43:48 +000012773#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012774
Daniel Veillard81273902003-09-30 00:43:48 +000012775#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012776/************************************************************************
12777 * *
12778 * Specific function to keep track of entities references *
12779 * and used by the XSLT debugger *
12780 * *
12781 ************************************************************************/
12782
12783static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12784
12785/**
12786 * xmlAddEntityReference:
12787 * @ent : A valid entity
12788 * @firstNode : A valid first node for children of entity
12789 * @lastNode : A valid last node of children entity
12790 *
12791 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12792 */
12793static void
12794xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12795 xmlNodePtr lastNode)
12796{
12797 if (xmlEntityRefFunc != NULL) {
12798 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12799 }
12800}
12801
12802
12803/**
12804 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012805 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012806 *
12807 * Set the function to call call back when a xml reference has been made
12808 */
12809void
12810xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12811{
12812 xmlEntityRefFunc = func;
12813}
Daniel Veillard81273902003-09-30 00:43:48 +000012814#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012815
12816/************************************************************************
12817 * *
12818 * Miscellaneous *
12819 * *
12820 ************************************************************************/
12821
12822#ifdef LIBXML_XPATH_ENABLED
12823#include <libxml/xpath.h>
12824#endif
12825
Daniel Veillardffa3c742005-07-21 13:24:09 +000012826extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012827static int xmlParserInitialized = 0;
12828
12829/**
12830 * xmlInitParser:
12831 *
12832 * Initialization function for the XML parser.
12833 * This is not reentrant. Call once before processing in case of
12834 * use in multithreaded programs.
12835 */
12836
12837void
12838xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012839 if (xmlParserInitialized != 0)
12840 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012841
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012842 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12843 (xmlGenericError == NULL))
12844 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012845 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012846 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012847 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012848 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012849 xmlDefaultSAXHandlerInit();
12850 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012851#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012852 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012853#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012854#ifdef LIBXML_HTML_ENABLED
12855 htmlInitAutoClose();
12856 htmlDefaultSAXHandlerInit();
12857#endif
12858#ifdef LIBXML_XPATH_ENABLED
12859 xmlXPathInit();
12860#endif
12861 xmlParserInitialized = 1;
12862}
12863
12864/**
12865 * xmlCleanupParser:
12866 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012867 * Cleanup function for the XML library. It tries to reclaim all
12868 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012869 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012870 * function should not prevent reusing the library but one should
12871 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012872 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012873 */
12874
12875void
12876xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012877 if (!xmlParserInitialized)
12878 return;
12879
Owen Taylor3473f882001-02-23 17:55:21 +000012880 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012881#ifdef LIBXML_CATALOG_ENABLED
12882 xmlCatalogCleanup();
12883#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012884 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012885 xmlCleanupInputCallbacks();
12886#ifdef LIBXML_OUTPUT_ENABLED
12887 xmlCleanupOutputCallbacks();
12888#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012889#ifdef LIBXML_SCHEMAS_ENABLED
12890 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012891 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012892#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012893 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012894 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012895 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012896 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012897 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012898}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012899
12900/************************************************************************
12901 * *
12902 * New set (2.6.0) of simpler and more flexible APIs *
12903 * *
12904 ************************************************************************/
12905
12906/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012907 * DICT_FREE:
12908 * @str: a string
12909 *
12910 * Free a string if it is not owned by the "dict" dictionnary in the
12911 * current scope
12912 */
12913#define DICT_FREE(str) \
12914 if ((str) && ((!dict) || \
12915 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12916 xmlFree((char *)(str));
12917
12918/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012919 * xmlCtxtReset:
12920 * @ctxt: an XML parser context
12921 *
12922 * Reset a parser context
12923 */
12924void
12925xmlCtxtReset(xmlParserCtxtPtr ctxt)
12926{
12927 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012928 xmlDictPtr dict;
12929
12930 if (ctxt == NULL)
12931 return;
12932
12933 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012934
12935 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12936 xmlFreeInputStream(input);
12937 }
12938 ctxt->inputNr = 0;
12939 ctxt->input = NULL;
12940
12941 ctxt->spaceNr = 0;
12942 ctxt->spaceTab[0] = -1;
12943 ctxt->space = &ctxt->spaceTab[0];
12944
12945
12946 ctxt->nodeNr = 0;
12947 ctxt->node = NULL;
12948
12949 ctxt->nameNr = 0;
12950 ctxt->name = NULL;
12951
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012952 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012953 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012954 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012955 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012956 DICT_FREE(ctxt->directory);
12957 ctxt->directory = NULL;
12958 DICT_FREE(ctxt->extSubURI);
12959 ctxt->extSubURI = NULL;
12960 DICT_FREE(ctxt->extSubSystem);
12961 ctxt->extSubSystem = NULL;
12962 if (ctxt->myDoc != NULL)
12963 xmlFreeDoc(ctxt->myDoc);
12964 ctxt->myDoc = NULL;
12965
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012966 ctxt->standalone = -1;
12967 ctxt->hasExternalSubset = 0;
12968 ctxt->hasPErefs = 0;
12969 ctxt->html = 0;
12970 ctxt->external = 0;
12971 ctxt->instate = XML_PARSER_START;
12972 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012974 ctxt->wellFormed = 1;
12975 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012976 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012977 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012978#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012979 ctxt->vctxt.userData = ctxt;
12980 ctxt->vctxt.error = xmlParserValidityError;
12981 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012982#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012983 ctxt->record_info = 0;
12984 ctxt->nbChars = 0;
12985 ctxt->checkIndex = 0;
12986 ctxt->inSubset = 0;
12987 ctxt->errNo = XML_ERR_OK;
12988 ctxt->depth = 0;
12989 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12990 ctxt->catalogs = NULL;
12991 xmlInitNodeInfoSeq(&ctxt->node_seq);
12992
12993 if (ctxt->attsDefault != NULL) {
12994 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12995 ctxt->attsDefault = NULL;
12996 }
12997 if (ctxt->attsSpecial != NULL) {
12998 xmlHashFree(ctxt->attsSpecial, NULL);
12999 ctxt->attsSpecial = NULL;
13000 }
13001
Daniel Veillard4432df22003-09-28 18:58:27 +000013002#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013003 if (ctxt->catalogs != NULL)
13004 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013005#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013006 if (ctxt->lastError.code != XML_ERR_OK)
13007 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008}
13009
13010/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013011 * xmlCtxtResetPush:
13012 * @ctxt: an XML parser context
13013 * @chunk: a pointer to an array of chars
13014 * @size: number of chars in the array
13015 * @filename: an optional file name or URI
13016 * @encoding: the document encoding, or NULL
13017 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013018 * Reset a push parser context
13019 *
13020 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013021 */
13022int
13023xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13024 int size, const char *filename, const char *encoding)
13025{
13026 xmlParserInputPtr inputStream;
13027 xmlParserInputBufferPtr buf;
13028 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13029
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013030 if (ctxt == NULL)
13031 return(1);
13032
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013033 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13034 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13035
13036 buf = xmlAllocParserInputBuffer(enc);
13037 if (buf == NULL)
13038 return(1);
13039
13040 if (ctxt == NULL) {
13041 xmlFreeParserInputBuffer(buf);
13042 return(1);
13043 }
13044
13045 xmlCtxtReset(ctxt);
13046
13047 if (ctxt->pushTab == NULL) {
13048 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13049 sizeof(xmlChar *));
13050 if (ctxt->pushTab == NULL) {
13051 xmlErrMemory(ctxt, NULL);
13052 xmlFreeParserInputBuffer(buf);
13053 return(1);
13054 }
13055 }
13056
13057 if (filename == NULL) {
13058 ctxt->directory = NULL;
13059 } else {
13060 ctxt->directory = xmlParserGetDirectory(filename);
13061 }
13062
13063 inputStream = xmlNewInputStream(ctxt);
13064 if (inputStream == NULL) {
13065 xmlFreeParserInputBuffer(buf);
13066 return(1);
13067 }
13068
13069 if (filename == NULL)
13070 inputStream->filename = NULL;
13071 else
13072 inputStream->filename = (char *)
13073 xmlCanonicPath((const xmlChar *) filename);
13074 inputStream->buf = buf;
13075 inputStream->base = inputStream->buf->buffer->content;
13076 inputStream->cur = inputStream->buf->buffer->content;
13077 inputStream->end =
13078 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13079
13080 inputPush(ctxt, inputStream);
13081
13082 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13083 (ctxt->input->buf != NULL)) {
13084 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13085 int cur = ctxt->input->cur - ctxt->input->base;
13086
13087 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13088
13089 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13090 ctxt->input->cur = ctxt->input->base + cur;
13091 ctxt->input->end =
13092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13093 use];
13094#ifdef DEBUG_PUSH
13095 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13096#endif
13097 }
13098
13099 if (encoding != NULL) {
13100 xmlCharEncodingHandlerPtr hdlr;
13101
13102 hdlr = xmlFindCharEncodingHandler(encoding);
13103 if (hdlr != NULL) {
13104 xmlSwitchToEncoding(ctxt, hdlr);
13105 } else {
13106 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13107 "Unsupported encoding %s\n", BAD_CAST encoding);
13108 }
13109 } else if (enc != XML_CHAR_ENCODING_NONE) {
13110 xmlSwitchEncoding(ctxt, enc);
13111 }
13112
13113 return(0);
13114}
13115
13116/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013117 * xmlCtxtUseOptions:
13118 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013119 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013120 *
13121 * Applies the options to the parser context
13122 *
13123 * Returns 0 in case of success, the set of unknown or unimplemented options
13124 * in case of error.
13125 */
13126int
13127xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13128{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013129 if (ctxt == NULL)
13130 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013131 if (options & XML_PARSE_RECOVER) {
13132 ctxt->recovery = 1;
13133 options -= XML_PARSE_RECOVER;
13134 } else
13135 ctxt->recovery = 0;
13136 if (options & XML_PARSE_DTDLOAD) {
13137 ctxt->loadsubset = XML_DETECT_IDS;
13138 options -= XML_PARSE_DTDLOAD;
13139 } else
13140 ctxt->loadsubset = 0;
13141 if (options & XML_PARSE_DTDATTR) {
13142 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13143 options -= XML_PARSE_DTDATTR;
13144 }
13145 if (options & XML_PARSE_NOENT) {
13146 ctxt->replaceEntities = 1;
13147 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13148 options -= XML_PARSE_NOENT;
13149 } else
13150 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013151 if (options & XML_PARSE_PEDANTIC) {
13152 ctxt->pedantic = 1;
13153 options -= XML_PARSE_PEDANTIC;
13154 } else
13155 ctxt->pedantic = 0;
13156 if (options & XML_PARSE_NOBLANKS) {
13157 ctxt->keepBlanks = 0;
13158 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13159 options -= XML_PARSE_NOBLANKS;
13160 } else
13161 ctxt->keepBlanks = 1;
13162 if (options & XML_PARSE_DTDVALID) {
13163 ctxt->validate = 1;
13164 if (options & XML_PARSE_NOWARNING)
13165 ctxt->vctxt.warning = NULL;
13166 if (options & XML_PARSE_NOERROR)
13167 ctxt->vctxt.error = NULL;
13168 options -= XML_PARSE_DTDVALID;
13169 } else
13170 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013171 if (options & XML_PARSE_NOWARNING) {
13172 ctxt->sax->warning = NULL;
13173 options -= XML_PARSE_NOWARNING;
13174 }
13175 if (options & XML_PARSE_NOERROR) {
13176 ctxt->sax->error = NULL;
13177 ctxt->sax->fatalError = NULL;
13178 options -= XML_PARSE_NOERROR;
13179 }
Daniel Veillard81273902003-09-30 00:43:48 +000013180#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013181 if (options & XML_PARSE_SAX1) {
13182 ctxt->sax->startElement = xmlSAX2StartElement;
13183 ctxt->sax->endElement = xmlSAX2EndElement;
13184 ctxt->sax->startElementNs = NULL;
13185 ctxt->sax->endElementNs = NULL;
13186 ctxt->sax->initialized = 1;
13187 options -= XML_PARSE_SAX1;
13188 }
Daniel Veillard81273902003-09-30 00:43:48 +000013189#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013190 if (options & XML_PARSE_NODICT) {
13191 ctxt->dictNames = 0;
13192 options -= XML_PARSE_NODICT;
13193 } else {
13194 ctxt->dictNames = 1;
13195 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013196 if (options & XML_PARSE_NOCDATA) {
13197 ctxt->sax->cdataBlock = NULL;
13198 options -= XML_PARSE_NOCDATA;
13199 }
13200 if (options & XML_PARSE_NSCLEAN) {
13201 ctxt->options |= XML_PARSE_NSCLEAN;
13202 options -= XML_PARSE_NSCLEAN;
13203 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013204 if (options & XML_PARSE_NONET) {
13205 ctxt->options |= XML_PARSE_NONET;
13206 options -= XML_PARSE_NONET;
13207 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013208 if (options & XML_PARSE_COMPACT) {
13209 ctxt->options |= XML_PARSE_COMPACT;
13210 options -= XML_PARSE_COMPACT;
13211 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013212 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013213 return (options);
13214}
13215
13216/**
13217 * xmlDoRead:
13218 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013219 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013220 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013221 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013222 * @reuse: keep the context for reuse
13223 *
13224 * Common front-end for the xmlRead functions
13225 *
13226 * Returns the resulting document tree or NULL
13227 */
13228static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013229xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13230 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013231{
13232 xmlDocPtr ret;
13233
13234 xmlCtxtUseOptions(ctxt, options);
13235 if (encoding != NULL) {
13236 xmlCharEncodingHandlerPtr hdlr;
13237
13238 hdlr = xmlFindCharEncodingHandler(encoding);
13239 if (hdlr != NULL)
13240 xmlSwitchToEncoding(ctxt, hdlr);
13241 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013242 if ((URL != NULL) && (ctxt->input != NULL) &&
13243 (ctxt->input->filename == NULL))
13244 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013245 xmlParseDocument(ctxt);
13246 if ((ctxt->wellFormed) || ctxt->recovery)
13247 ret = ctxt->myDoc;
13248 else {
13249 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013250 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013251 xmlFreeDoc(ctxt->myDoc);
13252 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013253 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013254 ctxt->myDoc = NULL;
13255 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013257 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013258
13259 return (ret);
13260}
13261
13262/**
13263 * xmlReadDoc:
13264 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013266 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013267 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013268 *
13269 * parse an XML in-memory document and build a tree.
13270 *
13271 * Returns the resulting document tree
13272 */
13273xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013274xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013275{
13276 xmlParserCtxtPtr ctxt;
13277
13278 if (cur == NULL)
13279 return (NULL);
13280
13281 ctxt = xmlCreateDocParserCtxt(cur);
13282 if (ctxt == NULL)
13283 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013284 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013285}
13286
13287/**
13288 * xmlReadFile:
13289 * @filename: a file or URL
13290 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013291 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013292 *
13293 * parse an XML file from the filesystem or the network.
13294 *
13295 * Returns the resulting document tree
13296 */
13297xmlDocPtr
13298xmlReadFile(const char *filename, const char *encoding, int options)
13299{
13300 xmlParserCtxtPtr ctxt;
13301
Daniel Veillard61b93382003-11-03 14:28:31 +000013302 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013303 if (ctxt == NULL)
13304 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013305 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013306}
13307
13308/**
13309 * xmlReadMemory:
13310 * @buffer: a pointer to a char array
13311 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013312 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013313 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013314 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013315 *
13316 * parse an XML in-memory document and build a tree.
13317 *
13318 * Returns the resulting document tree
13319 */
13320xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013321xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013322{
13323 xmlParserCtxtPtr ctxt;
13324
13325 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13326 if (ctxt == NULL)
13327 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013328 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013329}
13330
13331/**
13332 * xmlReadFd:
13333 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013334 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013335 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013336 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013337 *
13338 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013339 * NOTE that the file descriptor will not be closed when the
13340 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013341 *
13342 * Returns the resulting document tree
13343 */
13344xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013345xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013346{
13347 xmlParserCtxtPtr ctxt;
13348 xmlParserInputBufferPtr input;
13349 xmlParserInputPtr stream;
13350
13351 if (fd < 0)
13352 return (NULL);
13353
13354 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13355 if (input == NULL)
13356 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013357 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013358 ctxt = xmlNewParserCtxt();
13359 if (ctxt == NULL) {
13360 xmlFreeParserInputBuffer(input);
13361 return (NULL);
13362 }
13363 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13364 if (stream == NULL) {
13365 xmlFreeParserInputBuffer(input);
13366 xmlFreeParserCtxt(ctxt);
13367 return (NULL);
13368 }
13369 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013370 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013371}
13372
13373/**
13374 * xmlReadIO:
13375 * @ioread: an I/O read function
13376 * @ioclose: an I/O close function
13377 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013378 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013379 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013380 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013381 *
13382 * parse an XML document from I/O functions and source and build a tree.
13383 *
13384 * Returns the resulting document tree
13385 */
13386xmlDocPtr
13387xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013388 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013389{
13390 xmlParserCtxtPtr ctxt;
13391 xmlParserInputBufferPtr input;
13392 xmlParserInputPtr stream;
13393
13394 if (ioread == NULL)
13395 return (NULL);
13396
13397 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13398 XML_CHAR_ENCODING_NONE);
13399 if (input == NULL)
13400 return (NULL);
13401 ctxt = xmlNewParserCtxt();
13402 if (ctxt == NULL) {
13403 xmlFreeParserInputBuffer(input);
13404 return (NULL);
13405 }
13406 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13407 if (stream == NULL) {
13408 xmlFreeParserInputBuffer(input);
13409 xmlFreeParserCtxt(ctxt);
13410 return (NULL);
13411 }
13412 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013413 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013414}
13415
13416/**
13417 * xmlCtxtReadDoc:
13418 * @ctxt: an XML parser context
13419 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013420 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013421 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013422 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013423 *
13424 * parse an XML in-memory document and build a tree.
13425 * This reuses the existing @ctxt parser context
13426 *
13427 * Returns the resulting document tree
13428 */
13429xmlDocPtr
13430xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013431 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013432{
13433 xmlParserInputPtr stream;
13434
13435 if (cur == NULL)
13436 return (NULL);
13437 if (ctxt == NULL)
13438 return (NULL);
13439
13440 xmlCtxtReset(ctxt);
13441
13442 stream = xmlNewStringInputStream(ctxt, cur);
13443 if (stream == NULL) {
13444 return (NULL);
13445 }
13446 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013447 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013448}
13449
13450/**
13451 * xmlCtxtReadFile:
13452 * @ctxt: an XML parser context
13453 * @filename: a file or URL
13454 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013455 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013456 *
13457 * parse an XML file from the filesystem or the network.
13458 * This reuses the existing @ctxt parser context
13459 *
13460 * Returns the resulting document tree
13461 */
13462xmlDocPtr
13463xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13464 const char *encoding, int options)
13465{
13466 xmlParserInputPtr stream;
13467
13468 if (filename == NULL)
13469 return (NULL);
13470 if (ctxt == NULL)
13471 return (NULL);
13472
13473 xmlCtxtReset(ctxt);
13474
Daniel Veillard29614c72004-11-26 10:47:26 +000013475 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013476 if (stream == NULL) {
13477 return (NULL);
13478 }
13479 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013480 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013481}
13482
13483/**
13484 * xmlCtxtReadMemory:
13485 * @ctxt: an XML parser context
13486 * @buffer: a pointer to a char array
13487 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013488 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013489 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013490 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013491 *
13492 * parse an XML in-memory document and build a tree.
13493 * This reuses the existing @ctxt parser context
13494 *
13495 * Returns the resulting document tree
13496 */
13497xmlDocPtr
13498xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013499 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013500{
13501 xmlParserInputBufferPtr input;
13502 xmlParserInputPtr stream;
13503
13504 if (ctxt == NULL)
13505 return (NULL);
13506 if (buffer == NULL)
13507 return (NULL);
13508
13509 xmlCtxtReset(ctxt);
13510
13511 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13512 if (input == NULL) {
13513 return(NULL);
13514 }
13515
13516 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13517 if (stream == NULL) {
13518 xmlFreeParserInputBuffer(input);
13519 return(NULL);
13520 }
13521
13522 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013523 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013524}
13525
13526/**
13527 * xmlCtxtReadFd:
13528 * @ctxt: an XML parser context
13529 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013530 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013531 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013532 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013533 *
13534 * parse an XML from a file descriptor and build a tree.
13535 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013536 * NOTE that the file descriptor will not be closed when the
13537 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013538 *
13539 * Returns the resulting document tree
13540 */
13541xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013542xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13543 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013544{
13545 xmlParserInputBufferPtr input;
13546 xmlParserInputPtr stream;
13547
13548 if (fd < 0)
13549 return (NULL);
13550 if (ctxt == NULL)
13551 return (NULL);
13552
13553 xmlCtxtReset(ctxt);
13554
13555
13556 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13557 if (input == NULL)
13558 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013559 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13561 if (stream == NULL) {
13562 xmlFreeParserInputBuffer(input);
13563 return (NULL);
13564 }
13565 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013566 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013567}
13568
13569/**
13570 * xmlCtxtReadIO:
13571 * @ctxt: an XML parser context
13572 * @ioread: an I/O read function
13573 * @ioclose: an I/O close function
13574 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013575 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013576 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013577 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013578 *
13579 * parse an XML document from I/O functions and source and build a tree.
13580 * This reuses the existing @ctxt parser context
13581 *
13582 * Returns the resulting document tree
13583 */
13584xmlDocPtr
13585xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13586 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013587 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013588 const char *encoding, int options)
13589{
13590 xmlParserInputBufferPtr input;
13591 xmlParserInputPtr stream;
13592
13593 if (ioread == NULL)
13594 return (NULL);
13595 if (ctxt == NULL)
13596 return (NULL);
13597
13598 xmlCtxtReset(ctxt);
13599
13600 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13601 XML_CHAR_ENCODING_NONE);
13602 if (input == NULL)
13603 return (NULL);
13604 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13605 if (stream == NULL) {
13606 xmlFreeParserInputBuffer(input);
13607 return (NULL);
13608 }
13609 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013610 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013611}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013612
13613#define bottom_parser
13614#include "elfgcchack.h"