blob: 32927eba3bedde08e1e57cde90d1cbd474c44e55 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
3077 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003213 } else
3214 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217
3218mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003219 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003221}
3222
3223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224 * xmlParseAttValue:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3230 *
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3233 *
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3252 *
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3254 */
3255
3256
3257xmlChar *
3258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261}
3262
3263/**
Owen Taylor3473f882001-02-23 17:55:21 +00003264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3266 *
3267 * parse an XML Literal
3268 *
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3270 *
3271 * Returns the SystemLiteral parsed or NULL
3272 */
3273
3274xmlChar *
3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3277 int len = 0;
3278 int size = XML_PARSER_BUFFER_SIZE;
3279 int cur, l;
3280 xmlChar stop;
3281 int state = ctxt->instate;
3282 int count = 0;
3283
3284 SHRINK;
3285 if (RAW == '"') {
3286 NEXT;
3287 stop = '"';
3288 } else if (RAW == '\'') {
3289 NEXT;
3290 stop = '\'';
3291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003293 return(NULL);
3294 }
3295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 xmlChar *tmp;
3306
Owen Taylor3473f882001-02-23 17:55:21 +00003307 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3309 if (tmp == NULL) {
3310 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 ctxt->instate = (xmlParserInputState) state;
3313 return(NULL);
3314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003316 }
3317 count++;
3318 if (count > 50) {
3319 GROW;
3320 count = 0;
3321 }
3322 COPY_BUF(l,buf,len,cur);
3323 NEXTL(l);
3324 cur = CUR_CHAR(l);
3325 if (cur == 0) {
3326 GROW;
3327 SHRINK;
3328 cur = CUR_CHAR(l);
3329 }
3330 }
3331 buf[len] = 0;
3332 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003333 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 } else {
3336 NEXT;
3337 }
3338 return(buf);
3339}
3340
3341/**
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML public literal
3346 *
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3348 *
3349 * Returns the PubidLiteral parsed or NULL.
3350 */
3351
3352xmlChar *
3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3355 int len = 0;
3356 int size = XML_PARSER_BUFFER_SIZE;
3357 xmlChar cur;
3358 xmlChar stop;
3359 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003360 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 SHRINK;
3363 if (RAW == '"') {
3364 NEXT;
3365 stop = '"';
3366 } else if (RAW == '\'') {
3367 NEXT;
3368 stop = '\'';
3369 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003371 return(NULL);
3372 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 xmlChar *tmp;
3383
Owen Taylor3473f882001-02-23 17:55:21 +00003384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(NULL);
3390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 }
3393 buf[len++] = cur;
3394 count++;
3395 if (count > 50) {
3396 GROW;
3397 count = 0;
3398 }
3399 NEXT;
3400 cur = CUR;
3401 if (cur == 0) {
3402 GROW;
3403 SHRINK;
3404 cur = CUR;
3405 }
3406 }
3407 buf[len] = 0;
3408 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 } else {
3411 NEXT;
3412 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003413 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(buf);
3415}
3416
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003418
3419/*
3420 * used for the test in the inner loop of the char data testing
3421 */
3422static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3455};
3456
Owen Taylor3473f882001-02-23 17:55:21 +00003457/**
3458 * xmlParseCharData:
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3461 *
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3464 *
3465 * The right angle bracket (>) may be represented using the string "&gt;",
3466 * and must, for compatibility, be escaped using "&gt;" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3469 *
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3471 */
3472
3473void
3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003479 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003480
3481 SHRINK;
3482 GROW;
3483 /*
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003487 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003488 in = ctxt->input->cur;
3489 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003490get_more_space:
3491 while (*in == 0x20) in++;
3492 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003494 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003496 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003497 goto get_more_space;
3498 }
3499 if (*in == '<') {
3500 nbchar = in - ctxt->input->cur;
3501 if (nbchar > 0) {
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3504
Daniel Veillard34099b42004-11-04 17:34:35 +00003505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 if (*ctxt->space == -1)
3517 *ctxt->space = -2;
3518 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003521 ctxt->sax->characters(ctxt->userData,
3522 tmp, nbchar);
3523 }
3524 }
3525 return;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003528get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003529 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003530 while (test_char_data[*in]) {
3531 in++;
3532 ccol++;
3533 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003535 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003536 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003537 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003539 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003540 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003541 }
3542 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003545 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003546 return;
3547 }
3548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 goto get_more;
3551 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003552 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003553 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003556 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003557 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003565 } else {
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
3569 if (*ctxt->space == -1)
3570 *ctxt->space = -2;
3571 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003572 line = ctxt->input->line;
3573 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003574 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003578 line = ctxt->input->line;
3579 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003583 if (*in == 0xD) {
3584 in++;
3585 if (*in == 0xA) {
3586 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003588 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 in--;
3592 }
3593 if (*in == '<') {
3594 return;
3595 }
3596 if (*in == '&') {
3597 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
3599 SHRINK;
3600 GROW;
3601 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 nbchar = 0;
3604 }
Daniel Veillard50582112001-03-26 22:52:16 +00003605 ctxt->input->line = line;
3606 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 xmlParseCharDataComplex(ctxt, cdata);
3608}
3609
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610/**
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3614 *
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3618 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003619void
3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3622 int nbchar = 0;
3623 int cur, l;
3624 int count = 0;
3625
3626 SHRINK;
3627 GROW;
3628 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003629 while ((cur != '<') && /* checked */
3630 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if ((cur == ']') && (NXT(1) == ']') &&
3633 (NXT(2) == '>')) {
3634 if (cdata) break;
3635 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003637 }
3638 }
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003641 buf[nbchar] = 0;
3642
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003647 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3650 buf, nbchar);
3651 } else {
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 nbchar = 0;
3661 }
3662 count++;
3663 if (count > 50) {
3664 GROW;
3665 count = 0;
3666 }
3667 NEXTL(l);
3668 cur = CUR_CHAR(l);
3669 }
3670 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003671 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003673 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003674 */
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003676 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3684 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 }
3687 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3692 cur);
3693 NEXTL(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695}
3696
3697/**
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3703 *
3704 * Parse an External ID or a Public ID
3705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3708 *
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3711 *
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3713 *
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3717 */
3718
3719xmlChar *
3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3722
3723 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003724
3725 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003727 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 SKIP_BLANKS;
3733 URI = xmlParseSystemLiteral(ctxt);
3734 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (strict) {
3749 /*
3750 * We don't handle [83] so "S SystemLiteral" is required.
3751 */
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 } else {
3757 /*
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3761 */
3762 const xmlChar *ptr;
3763 GROW;
3764
3765 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767
William M. Brack76e95df2003-10-18 16:20:14 +00003768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3770 }
3771 SKIP_BLANKS;
3772 URI = xmlParseSystemLiteral(ctxt);
3773 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 }
3777 return(URI);
3778}
3779
3780/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003782 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003786 *
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003790 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3793 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003794static void
3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 int q, ql;
3797 int r, rl;
3798 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 xmlParserInputPtr input = ctxt->input;
3800 int count = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003803 len = 0;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3806 if (buf == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 return;
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003812 if (q == 0)
3813 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 NEXTL(ql);
3815 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003816 if (r == 0)
3817 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003818 NEXTL(rl);
3819 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003820 if (cur == 0)
3821 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003822 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003823 ((cur != '>') ||
3824 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003825 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003826 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003827 }
3828 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003829 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003830 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003831 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3832 if (new_buf == NULL) {
3833 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003834 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003835 return;
3836 }
William M. Bracka3215c72004-07-31 16:24:01 +00003837 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 }
3839 COPY_BUF(ql,buf,len,q);
3840 q = r;
3841 ql = rl;
3842 r = cur;
3843 rl = l;
3844
3845 count++;
3846 if (count > 50) {
3847 GROW;
3848 count = 0;
3849 }
3850 NEXTL(l);
3851 cur = CUR_CHAR(l);
3852 if (cur == 0) {
3853 SHRINK;
3854 GROW;
3855 cur = CUR_CHAR(l);
3856 }
3857 }
3858 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003859 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003861 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003862 xmlFree(buf);
3863 } else {
3864 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003867 }
3868 NEXT;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870 (!ctxt->disableSAX))
3871 ctxt->sax->comment(ctxt->userData, buf);
3872 xmlFree(buf);
3873 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003874 return;
3875not_terminated:
3876 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3877 "Comment not terminated\n", NULL);
3878 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003879}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003880/**
3881 * xmlParseComment:
3882 * @ctxt: an XML parser context
3883 *
3884 * Skip an XML (SGML) comment <!-- .... -->
3885 * The spec says that "For compatibility, the string "--" (double-hyphen)
3886 * must not occur within comments. "
3887 *
3888 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3889 */
3890void
3891xmlParseComment(xmlParserCtxtPtr ctxt) {
3892 xmlChar *buf = NULL;
3893 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003894 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003895 xmlParserInputState state;
3896 const xmlChar *in;
3897 int nbchar = 0, ccol;
3898
3899 /*
3900 * Check that there is a comment right here.
3901 */
3902 if ((RAW != '<') || (NXT(1) != '!') ||
3903 (NXT(2) != '-') || (NXT(3) != '-')) return;
3904
3905 state = ctxt->instate;
3906 ctxt->instate = XML_PARSER_COMMENT;
3907 SKIP(4);
3908 SHRINK;
3909 GROW;
3910
3911 /*
3912 * Accelerated common case where input don't need to be
3913 * modified before passing it to the handler.
3914 */
3915 in = ctxt->input->cur;
3916 do {
3917 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003918 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003919 ctxt->input->line++; ctxt->input->col = 1;
3920 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003921 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003922 }
3923get_more:
3924 ccol = ctxt->input->col;
3925 while (((*in > '-') && (*in <= 0x7F)) ||
3926 ((*in >= 0x20) && (*in < '-')) ||
3927 (*in == 0x09)) {
3928 in++;
3929 ccol++;
3930 }
3931 ctxt->input->col = ccol;
3932 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003933 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003934 ctxt->input->line++; ctxt->input->col = 1;
3935 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003936 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003937 goto get_more;
3938 }
3939 nbchar = in - ctxt->input->cur;
3940 /*
3941 * save current set of data
3942 */
3943 if (nbchar > 0) {
3944 if ((ctxt->sax != NULL) &&
3945 (ctxt->sax->comment != NULL)) {
3946 if (buf == NULL) {
3947 if ((*in == '-') && (in[1] == '-'))
3948 size = nbchar + 1;
3949 else
3950 size = XML_PARSER_BUFFER_SIZE + nbchar;
3951 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3952 if (buf == NULL) {
3953 xmlErrMemory(ctxt, NULL);
3954 ctxt->instate = state;
3955 return;
3956 }
3957 len = 0;
3958 } else if (len + nbchar + 1 >= size) {
3959 xmlChar *new_buf;
3960 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3961 new_buf = (xmlChar *) xmlRealloc(buf,
3962 size * sizeof(xmlChar));
3963 if (new_buf == NULL) {
3964 xmlFree (buf);
3965 xmlErrMemory(ctxt, NULL);
3966 ctxt->instate = state;
3967 return;
3968 }
3969 buf = new_buf;
3970 }
3971 memcpy(&buf[len], ctxt->input->cur, nbchar);
3972 len += nbchar;
3973 buf[len] = 0;
3974 }
3975 }
3976 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003977 if (*in == 0xA) {
3978 in++;
3979 ctxt->input->line++; ctxt->input->col = 1;
3980 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003981 if (*in == 0xD) {
3982 in++;
3983 if (*in == 0xA) {
3984 ctxt->input->cur = in;
3985 in++;
3986 ctxt->input->line++; ctxt->input->col = 1;
3987 continue; /* while */
3988 }
3989 in--;
3990 }
3991 SHRINK;
3992 GROW;
3993 in = ctxt->input->cur;
3994 if (*in == '-') {
3995 if (in[1] == '-') {
3996 if (in[2] == '>') {
3997 SKIP(3);
3998 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3999 (!ctxt->disableSAX)) {
4000 if (buf != NULL)
4001 ctxt->sax->comment(ctxt->userData, buf);
4002 else
4003 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4004 }
4005 if (buf != NULL)
4006 xmlFree(buf);
4007 ctxt->instate = state;
4008 return;
4009 }
4010 if (buf != NULL)
4011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4012 "Comment not terminated \n<!--%.50s\n",
4013 buf);
4014 else
4015 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4016 "Comment not terminated \n", NULL);
4017 in++;
4018 ctxt->input->col++;
4019 }
4020 in++;
4021 ctxt->input->col++;
4022 goto get_more;
4023 }
4024 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4025 xmlParseCommentComplex(ctxt, buf, len, size);
4026 ctxt->instate = state;
4027 return;
4028}
4029
Owen Taylor3473f882001-02-23 17:55:21 +00004030
4031/**
4032 * xmlParsePITarget:
4033 * @ctxt: an XML parser context
4034 *
4035 * parse the name of a PI
4036 *
4037 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4038 *
4039 * Returns the PITarget name or NULL
4040 */
4041
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004042const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004043xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004044 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004045
4046 name = xmlParseName(ctxt);
4047 if ((name != NULL) &&
4048 ((name[0] == 'x') || (name[0] == 'X')) &&
4049 ((name[1] == 'm') || (name[1] == 'M')) &&
4050 ((name[2] == 'l') || (name[2] == 'L'))) {
4051 int i;
4052 if ((name[0] == 'x') && (name[1] == 'm') &&
4053 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004054 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004055 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004056 return(name);
4057 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004058 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004059 return(name);
4060 }
4061 for (i = 0;;i++) {
4062 if (xmlW3CPIs[i] == NULL) break;
4063 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4064 return(name);
4065 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004066 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4067 "xmlParsePITarget: invalid name prefix 'xml'\n",
4068 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070 return(name);
4071}
4072
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073#ifdef LIBXML_CATALOG_ENABLED
4074/**
4075 * xmlParseCatalogPI:
4076 * @ctxt: an XML parser context
4077 * @catalog: the PI value string
4078 *
4079 * parse an XML Catalog Processing Instruction.
4080 *
4081 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4082 *
4083 * Occurs only if allowed by the user and if happening in the Misc
4084 * part of the document before any doctype informations
4085 * This will add the given catalog to the parsing context in order
4086 * to be used if there is a resolution need further down in the document
4087 */
4088
4089static void
4090xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4091 xmlChar *URL = NULL;
4092 const xmlChar *tmp, *base;
4093 xmlChar marker;
4094
4095 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004096 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004097 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4098 goto error;
4099 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004100 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004101 if (*tmp != '=') {
4102 return;
4103 }
4104 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004105 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004106 marker = *tmp;
4107 if ((marker != '\'') && (marker != '"'))
4108 goto error;
4109 tmp++;
4110 base = tmp;
4111 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4112 if (*tmp == 0)
4113 goto error;
4114 URL = xmlStrndup(base, tmp - base);
4115 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004116 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004117 if (*tmp != 0)
4118 goto error;
4119
4120 if (URL != NULL) {
4121 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4122 xmlFree(URL);
4123 }
4124 return;
4125
4126error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004127 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4128 "Catalog PI syntax error: %s\n",
4129 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004130 if (URL != NULL)
4131 xmlFree(URL);
4132}
4133#endif
4134
Owen Taylor3473f882001-02-23 17:55:21 +00004135/**
4136 * xmlParsePI:
4137 * @ctxt: an XML parser context
4138 *
4139 * parse an XML Processing Instruction.
4140 *
4141 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4142 *
4143 * The processing is transfered to SAX once parsed.
4144 */
4145
4146void
4147xmlParsePI(xmlParserCtxtPtr ctxt) {
4148 xmlChar *buf = NULL;
4149 int len = 0;
4150 int size = XML_PARSER_BUFFER_SIZE;
4151 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004152 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004153 xmlParserInputState state;
4154 int count = 0;
4155
4156 if ((RAW == '<') && (NXT(1) == '?')) {
4157 xmlParserInputPtr input = ctxt->input;
4158 state = ctxt->instate;
4159 ctxt->instate = XML_PARSER_PI;
4160 /*
4161 * this is a Processing Instruction.
4162 */
4163 SKIP(2);
4164 SHRINK;
4165
4166 /*
4167 * Parse the target name and check for special support like
4168 * namespace.
4169 */
4170 target = xmlParsePITarget(ctxt);
4171 if (target != NULL) {
4172 if ((RAW == '?') && (NXT(1) == '>')) {
4173 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4175 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004176 }
4177 SKIP(2);
4178
4179 /*
4180 * SAX: PI detected.
4181 */
4182 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4183 (ctxt->sax->processingInstruction != NULL))
4184 ctxt->sax->processingInstruction(ctxt->userData,
4185 target, NULL);
4186 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 return;
4188 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004189 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004191 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004192 ctxt->instate = state;
4193 return;
4194 }
4195 cur = CUR;
4196 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004197 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4198 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 SKIP_BLANKS;
4201 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004202 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004203 ((cur != '?') || (NXT(1) != '>'))) {
4204 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004205 xmlChar *tmp;
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4209 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004210 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004211 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 ctxt->instate = state;
4213 return;
4214 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004215 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 count++;
4218 if (count > 50) {
4219 GROW;
4220 count = 0;
4221 }
4222 COPY_BUF(l,buf,len,cur);
4223 NEXTL(l);
4224 cur = CUR_CHAR(l);
4225 if (cur == 0) {
4226 SHRINK;
4227 GROW;
4228 cur = CUR_CHAR(l);
4229 }
4230 }
4231 buf[len] = 0;
4232 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004233 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4234 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 } else {
4236 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4238 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004239 }
4240 SKIP(2);
4241
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004242#ifdef LIBXML_CATALOG_ENABLED
4243 if (((state == XML_PARSER_MISC) ||
4244 (state == XML_PARSER_START)) &&
4245 (xmlStrEqual(target, XML_CATALOG_PI))) {
4246 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4247 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4248 (allow == XML_CATA_ALLOW_ALL))
4249 xmlParseCatalogPI(ctxt, buf);
4250 }
4251#endif
4252
4253
Owen Taylor3473f882001-02-23 17:55:21 +00004254 /*
4255 * SAX: PI detected.
4256 */
4257 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->processingInstruction != NULL))
4259 ctxt->sax->processingInstruction(ctxt->userData,
4260 target, buf);
4261 }
4262 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 }
4266 ctxt->instate = state;
4267 }
4268}
4269
4270/**
4271 * xmlParseNotationDecl:
4272 * @ctxt: an XML parser context
4273 *
4274 * parse a notation declaration
4275 *
4276 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4277 *
4278 * Hence there is actually 3 choices:
4279 * 'PUBLIC' S PubidLiteral
4280 * 'PUBLIC' S PubidLiteral S SystemLiteral
4281 * and 'SYSTEM' S SystemLiteral
4282 *
4283 * See the NOTE on xmlParseExternalID().
4284 */
4285
4286void
4287xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004288 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 xmlChar *Pubid;
4290 xmlChar *Systemid;
4291
Daniel Veillarda07050d2003-10-19 14:46:32 +00004292 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004293 xmlParserInputPtr input = ctxt->input;
4294 SHRINK;
4295 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004299 return;
4300 }
4301 SKIP_BLANKS;
4302
Daniel Veillard76d66f42001-05-16 21:05:17 +00004303 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004305 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 return;
4307 }
William M. Brack76e95df2003-10-18 16:20:14 +00004308 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004310 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004311 return;
4312 }
4313 SKIP_BLANKS;
4314
4315 /*
4316 * Parse the IDs.
4317 */
4318 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4319 SKIP_BLANKS;
4320
4321 if (RAW == '>') {
4322 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4324 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326 NEXT;
4327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4328 (ctxt->sax->notationDecl != NULL))
4329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (Systemid != NULL) xmlFree(Systemid);
4334 if (Pubid != NULL) xmlFree(Pubid);
4335 }
4336}
4337
4338/**
4339 * xmlParseEntityDecl:
4340 * @ctxt: an XML parser context
4341 *
4342 * parse <!ENTITY declarations
4343 *
4344 * [70] EntityDecl ::= GEDecl | PEDecl
4345 *
4346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4347 *
4348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4349 *
4350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4351 *
4352 * [74] PEDef ::= EntityValue | ExternalID
4353 *
4354 * [76] NDataDecl ::= S 'NDATA' S Name
4355 *
4356 * [ VC: Notation Declared ]
4357 * The Name must match the declared name of a notation.
4358 */
4359
4360void
4361xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004362 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 xmlChar *value = NULL;
4364 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004365 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 int isParameter = 0;
4367 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004368 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004369
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004371 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004372 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SHRINK;
4374 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004375 skipped = SKIP_BLANKS;
4376 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004377 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4378 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
Owen Taylor3473f882001-02-23 17:55:21 +00004380
4381 if (RAW == '%') {
4382 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004383 skipped = SKIP_BLANKS;
4384 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4386 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004387 }
Owen Taylor3473f882001-02-23 17:55:21 +00004388 isParameter = 1;
4389 }
4390
Daniel Veillard76d66f42001-05-16 21:05:17 +00004391 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004393 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4394 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004395 return;
4396 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004397 skipped = SKIP_BLANKS;
4398 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4400 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004401 }
Owen Taylor3473f882001-02-23 17:55:21 +00004402
Daniel Veillardf5582f12002-06-11 10:08:16 +00004403 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 /*
4405 * handle the various case of definitions...
4406 */
4407 if (isParameter) {
4408 if ((RAW == '"') || (RAW == '\'')) {
4409 value = xmlParseEntityValue(ctxt, &orig);
4410 if (value) {
4411 if ((ctxt->sax != NULL) &&
4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413 ctxt->sax->entityDecl(ctxt->userData, name,
4414 XML_INTERNAL_PARAMETER_ENTITY,
4415 NULL, NULL, value);
4416 }
4417 } else {
4418 URI = xmlParseExternalID(ctxt, &literal, 1);
4419 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004421 }
4422 if (URI) {
4423 xmlURIPtr uri;
4424
4425 uri = xmlParseURI((const char *) URI);
4426 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004429 /*
4430 * This really ought to be a well formedness error
4431 * but the XML Core WG decided otherwise c.f. issue
4432 * E26 of the XML erratas.
4433 */
Owen Taylor3473f882001-02-23 17:55:21 +00004434 } else {
4435 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004436 /*
4437 * Okay this is foolish to block those but not
4438 * invalid URIs.
4439 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004441 } else {
4442 if ((ctxt->sax != NULL) &&
4443 (!ctxt->disableSAX) &&
4444 (ctxt->sax->entityDecl != NULL))
4445 ctxt->sax->entityDecl(ctxt->userData, name,
4446 XML_EXTERNAL_PARAMETER_ENTITY,
4447 literal, URI, NULL);
4448 }
4449 xmlFreeURI(uri);
4450 }
4451 }
4452 }
4453 } else {
4454 if ((RAW == '"') || (RAW == '\'')) {
4455 value = xmlParseEntityValue(ctxt, &orig);
4456 if ((ctxt->sax != NULL) &&
4457 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4458 ctxt->sax->entityDecl(ctxt->userData, name,
4459 XML_INTERNAL_GENERAL_ENTITY,
4460 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004461 /*
4462 * For expat compatibility in SAX mode.
4463 */
4464 if ((ctxt->myDoc == NULL) ||
4465 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4466 if (ctxt->myDoc == NULL) {
4467 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4468 }
4469 if (ctxt->myDoc->intSubset == NULL)
4470 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4471 BAD_CAST "fake", NULL, NULL);
4472
Daniel Veillard1af9a412003-08-20 22:54:39 +00004473 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4474 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004475 }
Owen Taylor3473f882001-02-23 17:55:21 +00004476 } else {
4477 URI = xmlParseExternalID(ctxt, &literal, 1);
4478 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 }
4481 if (URI) {
4482 xmlURIPtr uri;
4483
4484 uri = xmlParseURI((const char *)URI);
4485 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4487 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004488 /*
4489 * This really ought to be a well formedness error
4490 * but the XML Core WG decided otherwise c.f. issue
4491 * E26 of the XML erratas.
4492 */
Owen Taylor3473f882001-02-23 17:55:21 +00004493 } else {
4494 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004495 /*
4496 * Okay this is foolish to block those but not
4497 * invalid URIs.
4498 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004500 }
4501 xmlFreeURI(uri);
4502 }
4503 }
William M. Brack76e95df2003-10-18 16:20:14 +00004504 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4506 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004507 }
4508 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004509 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004510 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 }
4515 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004516 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4518 (ctxt->sax->unparsedEntityDecl != NULL))
4519 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4520 literal, URI, ndata);
4521 } else {
4522 if ((ctxt->sax != NULL) &&
4523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4524 ctxt->sax->entityDecl(ctxt->userData, name,
4525 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4526 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004527 /*
4528 * For expat compatibility in SAX mode.
4529 * assuming the entity repalcement was asked for
4530 */
4531 if ((ctxt->replaceEntities != 0) &&
4532 ((ctxt->myDoc == NULL) ||
4533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4534 if (ctxt->myDoc == NULL) {
4535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4536 }
4537
4538 if (ctxt->myDoc->intSubset == NULL)
4539 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4540 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004541 xmlSAX2EntityDecl(ctxt, name,
4542 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4543 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004544 }
Owen Taylor3473f882001-02-23 17:55:21 +00004545 }
4546 }
4547 }
4548 SKIP_BLANKS;
4549 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004551 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 } else {
4553 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004554 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4555 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004556 }
4557 NEXT;
4558 }
4559 if (orig != NULL) {
4560 /*
4561 * Ugly mechanism to save the raw entity value.
4562 */
4563 xmlEntityPtr cur = NULL;
4564
4565 if (isParameter) {
4566 if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->getParameterEntity != NULL))
4568 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4569 } else {
4570 if ((ctxt->sax != NULL) &&
4571 (ctxt->sax->getEntity != NULL))
4572 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004573 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004574 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004575 }
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
4577 if (cur != NULL) {
4578 if (cur->orig != NULL)
4579 xmlFree(orig);
4580 else
4581 cur->orig = orig;
4582 } else
4583 xmlFree(orig);
4584 }
Owen Taylor3473f882001-02-23 17:55:21 +00004585 if (value != NULL) xmlFree(value);
4586 if (URI != NULL) xmlFree(URI);
4587 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589}
4590
4591/**
4592 * xmlParseDefaultDecl:
4593 * @ctxt: an XML parser context
4594 * @value: Receive a possible fixed default value for the attribute
4595 *
4596 * Parse an attribute default declaration
4597 *
4598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4599 *
4600 * [ VC: Required Attribute ]
4601 * if the default declaration is the keyword #REQUIRED, then the
4602 * attribute must be specified for all elements of the type in the
4603 * attribute-list declaration.
4604 *
4605 * [ VC: Attribute Default Legal ]
4606 * The declared default value must meet the lexical constraints of
4607 * the declared attribute type c.f. xmlValidateAttributeDecl()
4608 *
4609 * [ VC: Fixed Attribute Default ]
4610 * if an attribute has a default value declared with the #FIXED
4611 * keyword, instances of that attribute must match the default value.
4612 *
4613 * [ WFC: No < in Attribute Values ]
4614 * handled in xmlParseAttValue()
4615 *
4616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4617 * or XML_ATTRIBUTE_FIXED.
4618 */
4619
4620int
4621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4622 int val;
4623 xmlChar *ret;
4624
4625 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004626 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004627 SKIP(9);
4628 return(XML_ATTRIBUTE_REQUIRED);
4629 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004630 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004631 SKIP(8);
4632 return(XML_ATTRIBUTE_IMPLIED);
4633 }
4634 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004635 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004636 SKIP(6);
4637 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004638 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4640 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 SKIP_BLANKS;
4643 }
4644 ret = xmlParseAttValue(ctxt);
4645 ctxt->instate = XML_PARSER_DTD;
4646 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004647 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004648 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 } else
4650 *value = ret;
4651 return(val);
4652}
4653
4654/**
4655 * xmlParseNotationType:
4656 * @ctxt: an XML parser context
4657 *
4658 * parse an Notation attribute type.
4659 *
4660 * Note: the leading 'NOTATION' S part has already being parsed...
4661 *
4662 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4663 *
4664 * [ VC: Notation Attributes ]
4665 * Values of this type must match one of the notation names included
4666 * in the declaration; all notation names in the declaration must be declared.
4667 *
4668 * Returns: the notation attribute tree built while parsing
4669 */
4670
4671xmlEnumerationPtr
4672xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004673 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4675
4676 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 return(NULL);
4679 }
4680 SHRINK;
4681 do {
4682 NEXT;
4683 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004684 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4687 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(ret);
4689 }
4690 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (cur == NULL) return(ret);
4692 if (last == NULL) ret = last = cur;
4693 else {
4694 last->next = cur;
4695 last = cur;
4696 }
4697 SKIP_BLANKS;
4698 } while (RAW == '|');
4699 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 if ((last != NULL) && (last != ret))
4702 xmlFreeEnumeration(last);
4703 return(ret);
4704 }
4705 NEXT;
4706 return(ret);
4707}
4708
4709/**
4710 * xmlParseEnumerationType:
4711 * @ctxt: an XML parser context
4712 *
4713 * parse an Enumeration attribute type.
4714 *
4715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4716 *
4717 * [ VC: Enumeration ]
4718 * Values of this type must match one of the Nmtoken tokens in
4719 * the declaration
4720 *
4721 * Returns: the enumeration attribute tree built while parsing
4722 */
4723
4724xmlEnumerationPtr
4725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4726 xmlChar *name;
4727 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4728
4729 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 return(NULL);
4732 }
4733 SHRINK;
4734 do {
4735 NEXT;
4736 SKIP_BLANKS;
4737 name = xmlParseNmtoken(ctxt);
4738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 return(ret);
4741 }
4742 cur = xmlCreateEnumeration(name);
4743 xmlFree(name);
4744 if (cur == NULL) return(ret);
4745 if (last == NULL) ret = last = cur;
4746 else {
4747 last->next = cur;
4748 last = cur;
4749 }
4750 SKIP_BLANKS;
4751 } while (RAW == '|');
4752 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004753 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004754 return(ret);
4755 }
4756 NEXT;
4757 return(ret);
4758}
4759
4760/**
4761 * xmlParseEnumeratedType:
4762 * @ctxt: an XML parser context
4763 * @tree: the enumeration tree built while parsing
4764 *
4765 * parse an Enumerated attribute type.
4766 *
4767 * [57] EnumeratedType ::= NotationType | Enumeration
4768 *
4769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4770 *
4771 *
4772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4773 */
4774
4775int
4776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004777 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004778 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004779 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4781 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004782 return(0);
4783 }
4784 SKIP_BLANKS;
4785 *tree = xmlParseNotationType(ctxt);
4786 if (*tree == NULL) return(0);
4787 return(XML_ATTRIBUTE_NOTATION);
4788 }
4789 *tree = xmlParseEnumerationType(ctxt);
4790 if (*tree == NULL) return(0);
4791 return(XML_ATTRIBUTE_ENUMERATION);
4792}
4793
4794/**
4795 * xmlParseAttributeType:
4796 * @ctxt: an XML parser context
4797 * @tree: the enumeration tree built while parsing
4798 *
4799 * parse the Attribute list def for an element
4800 *
4801 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4802 *
4803 * [55] StringType ::= 'CDATA'
4804 *
4805 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4806 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4807 *
4808 * Validity constraints for attribute values syntax are checked in
4809 * xmlValidateAttributeValue()
4810 *
4811 * [ VC: ID ]
4812 * Values of type ID must match the Name production. A name must not
4813 * appear more than once in an XML document as a value of this type;
4814 * i.e., ID values must uniquely identify the elements which bear them.
4815 *
4816 * [ VC: One ID per Element Type ]
4817 * No element type may have more than one ID attribute specified.
4818 *
4819 * [ VC: ID Attribute Default ]
4820 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4821 *
4822 * [ VC: IDREF ]
4823 * Values of type IDREF must match the Name production, and values
4824 * of type IDREFS must match Names; each IDREF Name must match the value
4825 * of an ID attribute on some element in the XML document; i.e. IDREF
4826 * values must match the value of some ID attribute.
4827 *
4828 * [ VC: Entity Name ]
4829 * Values of type ENTITY must match the Name production, values
4830 * of type ENTITIES must match Names; each Entity Name must match the
4831 * name of an unparsed entity declared in the DTD.
4832 *
4833 * [ VC: Name Token ]
4834 * Values of type NMTOKEN must match the Nmtoken production; values
4835 * of type NMTOKENS must match Nmtokens.
4836 *
4837 * Returns the attribute type
4838 */
4839int
4840xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4841 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004842 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 SKIP(5);
4844 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004845 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004846 SKIP(6);
4847 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004848 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004849 SKIP(5);
4850 return(XML_ATTRIBUTE_IDREF);
4851 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4852 SKIP(2);
4853 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004854 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004855 SKIP(6);
4856 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004857 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004858 SKIP(8);
4859 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004860 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004861 SKIP(8);
4862 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004863 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 SKIP(7);
4865 return(XML_ATTRIBUTE_NMTOKEN);
4866 }
4867 return(xmlParseEnumeratedType(ctxt, tree));
4868}
4869
4870/**
4871 * xmlParseAttributeListDecl:
4872 * @ctxt: an XML parser context
4873 *
4874 * : parse the Attribute list def for an element
4875 *
4876 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4877 *
4878 * [53] AttDef ::= S Name S AttType S DefaultDecl
4879 *
4880 */
4881void
4882xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004883 const xmlChar *elemName;
4884 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004885 xmlEnumerationPtr tree;
4886
Daniel Veillarda07050d2003-10-19 14:46:32 +00004887 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004888 xmlParserInputPtr input = ctxt->input;
4889
4890 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004891 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004893 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 }
4895 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004896 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004897 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4899 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return;
4901 }
4902 SKIP_BLANKS;
4903 GROW;
4904 while (RAW != '>') {
4905 const xmlChar *check = CUR_PTR;
4906 int type;
4907 int def;
4908 xmlChar *defaultValue = NULL;
4909
4910 GROW;
4911 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004912 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004913 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4915 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004916 break;
4917 }
4918 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004921 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 break;
4923 }
4924 SKIP_BLANKS;
4925
4926 type = xmlParseAttributeType(ctxt, &tree);
4927 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 break;
4929 }
4930
4931 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004932 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4934 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004935 if (tree != NULL)
4936 xmlFreeEnumeration(tree);
4937 break;
4938 }
4939 SKIP_BLANKS;
4940
4941 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4942 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (defaultValue != NULL)
4944 xmlFree(defaultValue);
4945 if (tree != NULL)
4946 xmlFreeEnumeration(tree);
4947 break;
4948 }
4949
4950 GROW;
4951 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004954 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004955 if (defaultValue != NULL)
4956 xmlFree(defaultValue);
4957 if (tree != NULL)
4958 xmlFreeEnumeration(tree);
4959 break;
4960 }
4961 SKIP_BLANKS;
4962 }
4963 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4965 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004966 if (defaultValue != NULL)
4967 xmlFree(defaultValue);
4968 if (tree != NULL)
4969 xmlFreeEnumeration(tree);
4970 break;
4971 }
4972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973 (ctxt->sax->attributeDecl != NULL))
4974 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4975 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004976 else if (tree != NULL)
4977 xmlFreeEnumeration(tree);
4978
4979 if ((ctxt->sax2) && (defaultValue != NULL) &&
4980 (def != XML_ATTRIBUTE_IMPLIED) &&
4981 (def != XML_ATTRIBUTE_REQUIRED)) {
4982 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004984 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4985 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4986 }
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (defaultValue != NULL)
4988 xmlFree(defaultValue);
4989 GROW;
4990 }
4991 if (RAW == '>') {
4992 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004993 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004995 }
4996 NEXT;
4997 }
Owen Taylor3473f882001-02-23 17:55:21 +00004998 }
4999}
5000
5001/**
5002 * xmlParseElementMixedContentDecl:
5003 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005004 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005005 *
5006 * parse the declaration for a Mixed Element content
5007 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5008 *
5009 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5010 * '(' S? '#PCDATA' S? ')'
5011 *
5012 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5013 *
5014 * [ VC: No Duplicate Types ]
5015 * The same name must not appear more than once in a single
5016 * mixed-content declaration.
5017 *
5018 * returns: the list of the xmlElementContentPtr describing the element choices
5019 */
5020xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005021xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005022 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005023 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005024
5025 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005026 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005027 SKIP(7);
5028 SKIP_BLANKS;
5029 SHRINK;
5030 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005031 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005032 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5033"Element content declaration doesn't start and stop in the same entity\n",
5034 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005035 }
Owen Taylor3473f882001-02-23 17:55:21 +00005036 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005037 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005038 if (RAW == '*') {
5039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5040 NEXT;
5041 }
5042 return(ret);
5043 }
5044 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005045 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005046 if (ret == NULL) return(NULL);
5047 }
5048 while (RAW == '|') {
5049 NEXT;
5050 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005051 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 if (ret == NULL) return(NULL);
5053 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005054 if (cur != NULL)
5055 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005056 cur = ret;
5057 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005058 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005059 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005060 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005061 if (n->c1 != NULL)
5062 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005063 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005064 if (n != NULL)
5065 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005066 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 }
5068 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005069 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005073 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005074 return(NULL);
5075 }
5076 SKIP_BLANKS;
5077 GROW;
5078 }
5079 if ((RAW == ')') && (NXT(1) == '*')) {
5080 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005081 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005082 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005083 if (cur->c2 != NULL)
5084 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005085 }
5086 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5089"Element content declaration doesn't start and stop in the same entity\n",
5090 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 SKIP(2);
5093 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005094 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 return(NULL);
5097 }
5098
5099 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005100 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005101 }
5102 return(ret);
5103}
5104
5105/**
5106 * xmlParseElementChildrenContentDecl:
5107 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005108 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005109 *
5110 * parse the declaration for a Mixed Element content
5111 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5112 *
5113 *
5114 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5115 *
5116 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5117 *
5118 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5119 *
5120 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5121 *
5122 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5123 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005124 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005125 * opening or closing parentheses in a choice, seq, or Mixed
5126 * construct is contained in the replacement text for a parameter
5127 * entity, both must be contained in the same replacement text. For
5128 * interoperability, if a parameter-entity reference appears in a
5129 * choice, seq, or Mixed construct, its replacement text should not
5130 * be empty, and neither the first nor last non-blank character of
5131 * the replacement text should be a connector (| or ,).
5132 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005133 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005134 * hierarchy.
5135 */
5136xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005137xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005139 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005140 xmlChar type = 0;
5141
5142 SKIP_BLANKS;
5143 GROW;
5144 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005146
Owen Taylor3473f882001-02-23 17:55:21 +00005147 /* Recurse on first child */
5148 NEXT;
5149 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005151 SKIP_BLANKS;
5152 GROW;
5153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005156 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 return(NULL);
5158 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005159 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005160 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005161 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005162 return(NULL);
5163 }
Owen Taylor3473f882001-02-23 17:55:21 +00005164 GROW;
5165 if (RAW == '?') {
5166 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5167 NEXT;
5168 } else if (RAW == '*') {
5169 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5170 NEXT;
5171 } else if (RAW == '+') {
5172 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5173 NEXT;
5174 } else {
5175 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5176 }
Owen Taylor3473f882001-02-23 17:55:21 +00005177 GROW;
5178 }
5179 SKIP_BLANKS;
5180 SHRINK;
5181 while (RAW != ')') {
5182 /*
5183 * Each loop we parse one separator and one element.
5184 */
5185 if (RAW == ',') {
5186 if (type == 0) type = CUR;
5187
5188 /*
5189 * Detect "Name | Name , Name" error
5190 */
5191 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005193 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005195 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005198 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005199 return(NULL);
5200 }
5201 NEXT;
5202
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005203 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005205 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005206 xmlFreeDocElementContent(ctxt->myDoc, last);
5207 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 return(NULL);
5209 }
5210 if (last == NULL) {
5211 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005212 if (ret != NULL)
5213 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005214 ret = cur = op;
5215 } else {
5216 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005217 if (op != NULL)
5218 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005219 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005220 if (last != NULL)
5221 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005222 cur =op;
5223 last = NULL;
5224 }
5225 } else if (RAW == '|') {
5226 if (type == 0) type = CUR;
5227
5228 /*
5229 * Detect "Name , Name | Name" error
5230 */
5231 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005233 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005234 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005235 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005238 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 return(NULL);
5240 }
5241 NEXT;
5242
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005243 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005245 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005248 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005249 return(NULL);
5250 }
5251 if (last == NULL) {
5252 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005253 if (ret != NULL)
5254 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005255 ret = cur = op;
5256 } else {
5257 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005258 if (op != NULL)
5259 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005261 if (last != NULL)
5262 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005263 cur =op;
5264 last = NULL;
5265 }
5266 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005269 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(NULL);
5271 }
5272 GROW;
5273 SKIP_BLANKS;
5274 GROW;
5275 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005276 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 /* Recurse on second child */
5278 NEXT;
5279 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005281 SKIP_BLANKS;
5282 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005283 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005287 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005288 return(NULL);
5289 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005290 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005291 if (RAW == '?') {
5292 last->ocur = XML_ELEMENT_CONTENT_OPT;
5293 NEXT;
5294 } else if (RAW == '*') {
5295 last->ocur = XML_ELEMENT_CONTENT_MULT;
5296 NEXT;
5297 } else if (RAW == '+') {
5298 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5299 NEXT;
5300 } else {
5301 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5302 }
5303 }
5304 SKIP_BLANKS;
5305 GROW;
5306 }
5307 if ((cur != NULL) && (last != NULL)) {
5308 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005309 if (last != NULL)
5310 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005312 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005313 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5314"Element content declaration doesn't start and stop in the same entity\n",
5315 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 NEXT;
5318 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005319 if (ret != NULL) {
5320 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5321 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5323 else
5324 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5325 }
Owen Taylor3473f882001-02-23 17:55:21 +00005326 NEXT;
5327 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005328 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005329 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005330 cur = ret;
5331 /*
5332 * Some normalization:
5333 * (a | b* | c?)* == (a | b | c)*
5334 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005335 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005336 if ((cur->c1 != NULL) &&
5337 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5338 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5339 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5340 if ((cur->c2 != NULL) &&
5341 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5342 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5343 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5344 cur = cur->c2;
5345 }
5346 }
Owen Taylor3473f882001-02-23 17:55:21 +00005347 NEXT;
5348 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005349 if (ret != NULL) {
5350 int found = 0;
5351
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005352 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005355 else
5356 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005357 /*
5358 * Some normalization:
5359 * (a | b*)+ == (a | b)*
5360 * (a | b?)+ == (a | b)*
5361 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005363 if ((cur->c1 != NULL) &&
5364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5367 found = 1;
5368 }
5369 if ((cur->c2 != NULL) &&
5370 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5373 found = 1;
5374 }
5375 cur = cur->c2;
5376 }
5377 if (found)
5378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5379 }
Owen Taylor3473f882001-02-23 17:55:21 +00005380 NEXT;
5381 }
5382 return(ret);
5383}
5384
5385/**
5386 * xmlParseElementContentDecl:
5387 * @ctxt: an XML parser context
5388 * @name: the name of the element being defined.
5389 * @result: the Element Content pointer will be stored here if any
5390 *
5391 * parse the declaration for an Element content either Mixed or Children,
5392 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5393 *
5394 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5395 *
5396 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5397 */
5398
5399int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005400xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005401 xmlElementContentPtr *result) {
5402
5403 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 int res;
5406
5407 *result = NULL;
5408
5409 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005410 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005411 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 return(-1);
5413 }
5414 NEXT;
5415 GROW;
5416 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005417 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005418 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 res = XML_ELEMENT_TYPE_MIXED;
5420 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005422 res = XML_ELEMENT_TYPE_ELEMENT;
5423 }
Owen Taylor3473f882001-02-23 17:55:21 +00005424 SKIP_BLANKS;
5425 *result = tree;
5426 return(res);
5427}
5428
5429/**
5430 * xmlParseElementDecl:
5431 * @ctxt: an XML parser context
5432 *
5433 * parse an Element declaration.
5434 *
5435 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5436 *
5437 * [ VC: Unique Element Type Declaration ]
5438 * No element type may be declared more than once
5439 *
5440 * Returns the type of the element, or -1 in case of error
5441 */
5442int
5443xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005444 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005445 int ret = -1;
5446 xmlElementContentPtr content = NULL;
5447
Daniel Veillard4c778d82005-01-23 17:37:44 +00005448 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005449 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005450 xmlParserInputPtr input = ctxt->input;
5451
5452 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005453 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005456 }
5457 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5461 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 return(-1);
5463 }
5464 while ((RAW == 0) && (ctxt->inputNr > 1))
5465 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005466 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
5470 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005471 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005472 SKIP(5);
5473 /*
5474 * Element must always be empty.
5475 */
5476 ret = XML_ELEMENT_TYPE_EMPTY;
5477 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5478 (NXT(2) == 'Y')) {
5479 SKIP(3);
5480 /*
5481 * Element is a generic container.
5482 */
5483 ret = XML_ELEMENT_TYPE_ANY;
5484 } else if (RAW == '(') {
5485 ret = xmlParseElementContentDecl(ctxt, name, &content);
5486 } else {
5487 /*
5488 * [ WFC: PEs in Internal Subset ] error handling.
5489 */
5490 if ((RAW == '%') && (ctxt->external == 0) &&
5491 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005492 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005494 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005495 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005496 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498 return(-1);
5499 }
5500
5501 SKIP_BLANKS;
5502 /*
5503 * Pop-up of finished entities.
5504 */
5505 while ((RAW == 0) && (ctxt->inputNr > 1))
5506 xmlPopInput(ctxt);
5507 SKIP_BLANKS;
5508
5509 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005510 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005511 if (content != NULL) {
5512 xmlFreeDocElementContent(ctxt->myDoc, content);
5513 }
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else {
5515 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005516 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519
5520 NEXT;
5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005522 (ctxt->sax->elementDecl != NULL)) {
5523 if (content != NULL)
5524 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5526 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005527 if ((content != NULL) && (content->parent == NULL)) {
5528 /*
5529 * this is a trick: if xmlAddElementDecl is called,
5530 * instead of copying the full tree it is plugged directly
5531 * if called from the parser. Avoid duplicating the
5532 * interfaces or change the API/ABI
5533 */
5534 xmlFreeDocElementContent(ctxt->myDoc, content);
5535 }
5536 } else if (content != NULL) {
5537 xmlFreeDocElementContent(ctxt->myDoc, content);
5538 }
Owen Taylor3473f882001-02-23 17:55:21 +00005539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
5541 return(ret);
5542}
5543
5544/**
Owen Taylor3473f882001-02-23 17:55:21 +00005545 * xmlParseConditionalSections
5546 * @ctxt: an XML parser context
5547 *
5548 * [61] conditionalSect ::= includeSect | ignoreSect
5549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5553 */
5554
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005555static void
Owen Taylor3473f882001-02-23 17:55:21 +00005556xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5557 SKIP(3);
5558 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005559 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 SKIP(7);
5561 SKIP_BLANKS;
5562 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else {
5565 NEXT;
5566 }
5567 if (xmlParserDebugEntities) {
5568 if ((ctxt->input != NULL) && (ctxt->input->filename))
5569 xmlGenericError(xmlGenericErrorContext,
5570 "%s(%d): ", ctxt->input->filename,
5571 ctxt->input->line);
5572 xmlGenericError(xmlGenericErrorContext,
5573 "Entering INCLUDE Conditional Section\n");
5574 }
5575
5576 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5577 (NXT(2) != '>'))) {
5578 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005579 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005580
5581 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5582 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005583 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005584 NEXT;
5585 } else if (RAW == '%') {
5586 xmlParsePEReference(ctxt);
5587 } else
5588 xmlParseMarkupDecl(ctxt);
5589
5590 /*
5591 * Pop-up of finished entities.
5592 */
5593 while ((RAW == 0) && (ctxt->inputNr > 1))
5594 xmlPopInput(ctxt);
5595
Daniel Veillardfdc91562002-07-01 21:52:03 +00005596 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005597 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 break;
5599 }
5600 }
5601 if (xmlParserDebugEntities) {
5602 if ((ctxt->input != NULL) && (ctxt->input->filename))
5603 xmlGenericError(xmlGenericErrorContext,
5604 "%s(%d): ", ctxt->input->filename,
5605 ctxt->input->line);
5606 xmlGenericError(xmlGenericErrorContext,
5607 "Leaving INCLUDE Conditional Section\n");
5608 }
5609
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005612 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005613 int depth = 0;
5614
5615 SKIP(6);
5616 SKIP_BLANKS;
5617 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005618 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005619 } else {
5620 NEXT;
5621 }
5622 if (xmlParserDebugEntities) {
5623 if ((ctxt->input != NULL) && (ctxt->input->filename))
5624 xmlGenericError(xmlGenericErrorContext,
5625 "%s(%d): ", ctxt->input->filename,
5626 ctxt->input->line);
5627 xmlGenericError(xmlGenericErrorContext,
5628 "Entering IGNORE Conditional Section\n");
5629 }
5630
5631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005632 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005633 * But disable SAX event generating DTD building in the meantime
5634 */
5635 state = ctxt->disableSAX;
5636 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005638 ctxt->instate = XML_PARSER_IGNORE;
5639
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005640 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5642 depth++;
5643 SKIP(3);
5644 continue;
5645 }
5646 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5647 if (--depth >= 0) SKIP(3);
5648 continue;
5649 }
5650 NEXT;
5651 continue;
5652 }
5653
5654 ctxt->disableSAX = state;
5655 ctxt->instate = instate;
5656
5657 if (xmlParserDebugEntities) {
5658 if ((ctxt->input != NULL) && (ctxt->input->filename))
5659 xmlGenericError(xmlGenericErrorContext,
5660 "%s(%d): ", ctxt->input->filename,
5661 ctxt->input->line);
5662 xmlGenericError(xmlGenericErrorContext,
5663 "Leaving IGNORE Conditional Section\n");
5664 }
5665
5666 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669
5670 if (RAW == 0)
5671 SHRINK;
5672
5673 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005674 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005675 } else {
5676 SKIP(3);
5677 }
5678}
5679
5680/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005681 * xmlParseMarkupDecl:
5682 * @ctxt: an XML parser context
5683 *
5684 * parse Markup declarations
5685 *
5686 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5687 * NotationDecl | PI | Comment
5688 *
5689 * [ VC: Proper Declaration/PE Nesting ]
5690 * Parameter-entity replacement text must be properly nested with
5691 * markup declarations. That is to say, if either the first character
5692 * or the last character of a markup declaration (markupdecl above) is
5693 * contained in the replacement text for a parameter-entity reference,
5694 * both must be contained in the same replacement text.
5695 *
5696 * [ WFC: PEs in Internal Subset ]
5697 * In the internal DTD subset, parameter-entity references can occur
5698 * only where markup declarations can occur, not within markup declarations.
5699 * (This does not apply to references that occur in external parameter
5700 * entities or to the external subset.)
5701 */
5702void
5703xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5704 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005705 if (CUR == '<') {
5706 if (NXT(1) == '!') {
5707 switch (NXT(2)) {
5708 case 'E':
5709 if (NXT(3) == 'L')
5710 xmlParseElementDecl(ctxt);
5711 else if (NXT(3) == 'N')
5712 xmlParseEntityDecl(ctxt);
5713 break;
5714 case 'A':
5715 xmlParseAttributeListDecl(ctxt);
5716 break;
5717 case 'N':
5718 xmlParseNotationDecl(ctxt);
5719 break;
5720 case '-':
5721 xmlParseComment(ctxt);
5722 break;
5723 default:
5724 /* there is an error but it will be detected later */
5725 break;
5726 }
5727 } else if (NXT(1) == '?') {
5728 xmlParsePI(ctxt);
5729 }
5730 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005731 /*
5732 * This is only for internal subset. On external entities,
5733 * the replacement is done before parsing stage
5734 */
5735 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5736 xmlParsePEReference(ctxt);
5737
5738 /*
5739 * Conditional sections are allowed from entities included
5740 * by PE References in the internal subset.
5741 */
5742 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5744 xmlParseConditionalSections(ctxt);
5745 }
5746 }
5747
5748 ctxt->instate = XML_PARSER_DTD;
5749}
5750
5751/**
5752 * xmlParseTextDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse an XML declaration header for external entities
5756 *
5757 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5758 *
5759 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5760 */
5761
5762void
5763xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5764 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005765 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005766
5767 /*
5768 * We know that '<?xml' is here.
5769 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005771 SKIP(5);
5772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005774 return;
5775 }
5776
William M. Brack76e95df2003-10-18 16:20:14 +00005777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005780 }
5781 SKIP_BLANKS;
5782
5783 /*
5784 * We may have the VersionInfo here.
5785 */
5786 version = xmlParseVersionInfo(ctxt);
5787 if (version == NULL)
5788 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005789 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005790 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005793 }
5794 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005795 ctxt->input->version = version;
5796
5797 /*
5798 * We must have the encoding declaration
5799 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005800 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005801 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5802 /*
5803 * The XML REC instructs us to stop parsing right here
5804 */
5805 return;
5806 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005807 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5809 "Missing encoding in text declaration\n");
5810 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005811
5812 SKIP_BLANKS;
5813 if ((RAW == '?') && (NXT(1) == '>')) {
5814 SKIP(2);
5815 } else if (RAW == '>') {
5816 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005817 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005818 NEXT;
5819 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005820 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005821 MOVETO_ENDTAG(CUR_PTR);
5822 NEXT;
5823 }
5824}
5825
5826/**
Owen Taylor3473f882001-02-23 17:55:21 +00005827 * xmlParseExternalSubset:
5828 * @ctxt: an XML parser context
5829 * @ExternalID: the external identifier
5830 * @SystemID: the system identifier (or URL)
5831 *
5832 * parse Markup declarations from an external subset
5833 *
5834 * [30] extSubset ::= textDecl? extSubsetDecl
5835 *
5836 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5837 */
5838void
5839xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5840 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005841 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005842 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005843 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005844 xmlParseTextDecl(ctxt);
5845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5846 /*
5847 * The XML REC instructs us to stop parsing right here
5848 */
5849 ctxt->instate = XML_PARSER_EOF;
5850 return;
5851 }
5852 }
5853 if (ctxt->myDoc == NULL) {
5854 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5855 }
5856 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5857 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5858
5859 ctxt->instate = XML_PARSER_DTD;
5860 ctxt->external = 1;
5861 while (((RAW == '<') && (NXT(1) == '?')) ||
5862 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005863 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005864 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005865 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005866
5867 GROW;
5868 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5869 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005870 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 NEXT;
5872 } else if (RAW == '%') {
5873 xmlParsePEReference(ctxt);
5874 } else
5875 xmlParseMarkupDecl(ctxt);
5876
5877 /*
5878 * Pop-up of finished entities.
5879 */
5880 while ((RAW == 0) && (ctxt->inputNr > 1))
5881 xmlPopInput(ctxt);
5882
Daniel Veillardfdc91562002-07-01 21:52:03 +00005883 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005884 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 break;
5886 }
5887 }
5888
5889 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 }
5892
5893}
5894
5895/**
5896 * xmlParseReference:
5897 * @ctxt: an XML parser context
5898 *
5899 * parse and handle entity references in content, depending on the SAX
5900 * interface, this may end-up in a call to character() if this is a
5901 * CharRef, a predefined entity, if there is no reference() callback.
5902 * or if the parser was asked to switch to that mode.
5903 *
5904 * [67] Reference ::= EntityRef | CharRef
5905 */
5906void
5907xmlParseReference(xmlParserCtxtPtr ctxt) {
5908 xmlEntityPtr ent;
5909 xmlChar *val;
5910 if (RAW != '&') return;
5911
5912 if (NXT(1) == '#') {
5913 int i = 0;
5914 xmlChar out[10];
5915 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005916 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005917
5918 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5919 /*
5920 * So we are using non-UTF-8 buffers
5921 * Check that the char fit on 8bits, if not
5922 * generate a CharRef.
5923 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005924 if (value <= 0xFF) {
5925 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005926 out[1] = 0;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928 (!ctxt->disableSAX))
5929 ctxt->sax->characters(ctxt->userData, out, 1);
5930 } else {
5931 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005932 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005934 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5936 (!ctxt->disableSAX))
5937 ctxt->sax->reference(ctxt->userData, out);
5938 }
5939 } else {
5940 /*
5941 * Just encode the value in UTF-8
5942 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005943 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 out[i] = 0;
5945 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5946 (!ctxt->disableSAX))
5947 ctxt->sax->characters(ctxt->userData, out, i);
5948 }
5949 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005950 int was_checked;
5951
Owen Taylor3473f882001-02-23 17:55:21 +00005952 ent = xmlParseEntityRef(ctxt);
5953 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005954 if (!ctxt->wellFormed)
5955 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005956 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005957 if ((ent->name != NULL) &&
5958 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5959 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005960 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005961
5962
5963 /*
5964 * The first reference to the entity trigger a parsing phase
5965 * where the ent->children is filled with the result from
5966 * the parsing.
5967 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005968 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005969 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005970
Owen Taylor3473f882001-02-23 17:55:21 +00005971 value = ent->content;
5972
5973 /*
5974 * Check that this entity is well formed
5975 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005976 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005977 (value[1] == 0) && (value[0] == '<') &&
5978 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5979 /*
5980 * DONE: get definite answer on this !!!
5981 * Lots of entity decls are used to declare a single
5982 * char
5983 * <!ENTITY lt "<">
5984 * Which seems to be valid since
5985 * 2.4: The ampersand character (&) and the left angle
5986 * bracket (<) may appear in their literal form only
5987 * when used ... They are also legal within the literal
5988 * entity value of an internal entity declaration;i
5989 * see "4.3.2 Well-Formed Parsed Entities".
5990 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5991 * Looking at the OASIS test suite and James Clark
5992 * tests, this is broken. However the XML REC uses
5993 * it. Is the XML REC not well-formed ????
5994 * This is a hack to avoid this problem
5995 *
5996 * ANSWER: since lt gt amp .. are already defined,
5997 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005998 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005999 * is lousy but acceptable.
6000 */
6001 list = xmlNewDocText(ctxt->myDoc, value);
6002 if (list != NULL) {
6003 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6004 (ent->children == NULL)) {
6005 ent->children = list;
6006 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006007 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006008 list->parent = (xmlNodePtr) ent;
6009 } else {
6010 xmlFreeNodeList(list);
6011 }
6012 } else if (list != NULL) {
6013 xmlFreeNodeList(list);
6014 }
6015 } else {
6016 /*
6017 * 4.3.2: An internal general parsed entity is well-formed
6018 * if its replacement text matches the production labeled
6019 * content.
6020 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006021
6022 void *user_data;
6023 /*
6024 * This is a bit hackish but this seems the best
6025 * way to make sure both SAX and DOM entity support
6026 * behaves okay.
6027 */
6028 if (ctxt->userData == ctxt)
6029 user_data = NULL;
6030 else
6031 user_data = ctxt->userData;
6032
Owen Taylor3473f882001-02-23 17:55:21 +00006033 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6034 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006035 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6036 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006037 ctxt->depth--;
6038 } else if (ent->etype ==
6039 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6040 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006041 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006042 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006043 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 ctxt->depth--;
6045 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006046 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006047 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6048 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
6050 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006051 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006052 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006053 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006054 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006056 (ent->children == NULL)) {
6057 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006058 if (ctxt->replaceEntities) {
6059 /*
6060 * Prune it directly in the generated document
6061 * except for single text nodes.
6062 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006063 if (((list->type == XML_TEXT_NODE) &&
6064 (list->next == NULL)) ||
6065 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006066 list->parent = (xmlNodePtr) ent;
6067 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006068 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006069 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006070 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006071 while (list != NULL) {
6072 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006073 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006074 if (list->next == NULL)
6075 ent->last = list;
6076 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006077 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006078 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006079#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006080 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6081 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006082#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006083 }
6084 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006085 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006086 while (list != NULL) {
6087 list->parent = (xmlNodePtr) ent;
6088 if (list->next == NULL)
6089 ent->last = list;
6090 list = list->next;
6091 }
Owen Taylor3473f882001-02-23 17:55:21 +00006092 }
6093 } else {
6094 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006095 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006096 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006097 } else if ((ret != XML_ERR_OK) &&
6098 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006099 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 } else if (list != NULL) {
6101 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006102 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006105 ent->checked = 1;
6106 }
6107
6108 if (ent->children == NULL) {
6109 /*
6110 * Probably running in SAX mode and the callbacks don't
6111 * build the entity content. So unless we already went
6112 * though parsing for first checking go though the entity
6113 * content to generate callbacks associated to the entity
6114 */
6115 if (was_checked == 1) {
6116 void *user_data;
6117 /*
6118 * This is a bit hackish but this seems the best
6119 * way to make sure both SAX and DOM entity support
6120 * behaves okay.
6121 */
6122 if (ctxt->userData == ctxt)
6123 user_data = NULL;
6124 else
6125 user_data = ctxt->userData;
6126
6127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6128 ctxt->depth++;
6129 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6130 ent->content, user_data, NULL);
6131 ctxt->depth--;
6132 } else if (ent->etype ==
6133 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6134 ctxt->depth++;
6135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6136 ctxt->sax, user_data, ctxt->depth,
6137 ent->URI, ent->ExternalID, NULL);
6138 ctxt->depth--;
6139 } else {
6140 ret = XML_ERR_ENTITY_PE_INTERNAL;
6141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6142 "invalid entity type found\n", NULL);
6143 }
6144 if (ret == XML_ERR_ENTITY_LOOP) {
6145 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6146 return;
6147 }
6148 }
6149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6151 /*
6152 * Entity reference callback comes second, it's somewhat
6153 * superfluous but a compatibility to historical behaviour
6154 */
6155 ctxt->sax->reference(ctxt->userData, ent->name);
6156 }
6157 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 }
6159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006160 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006161 /*
6162 * Create a node.
6163 */
6164 ctxt->sax->reference(ctxt->userData, ent->name);
6165 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006166 }
6167 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006168 /*
6169 * There is a problem on the handling of _private for entities
6170 * (bug 155816): Should we copy the content of the field from
6171 * the entity (possibly overwriting some value set by the user
6172 * when a copy is created), should we leave it alone, or should
6173 * we try to take care of different situations? The problem
6174 * is exacerbated by the usage of this field by the xmlReader.
6175 * To fix this bug, we look at _private on the created node
6176 * and, if it's NULL, we copy in whatever was in the entity.
6177 * If it's not NULL we leave it alone. This is somewhat of a
6178 * hack - maybe we should have further tests to determine
6179 * what to do.
6180 */
Owen Taylor3473f882001-02-23 17:55:21 +00006181 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6182 /*
6183 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006184 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006185 * In the first occurrence list contains the replacement.
6186 * progressive == 2 means we are operating on the Reader
6187 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006188 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006189 if (((list == NULL) && (ent->owner == 0)) ||
6190 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006191 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006192
6193 /*
6194 * when operating on a reader, the entities definitions
6195 * are always owning the entities subtree.
6196 if (ctxt->parseMode == XML_PARSE_READER)
6197 ent->owner = 1;
6198 */
6199
Daniel Veillard62f313b2001-07-04 19:49:14 +00006200 cur = ent->children;
6201 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006203 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006204 if (nw->_private == NULL)
6205 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006206 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006207 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006208 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006209 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006210 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006211 if (cur == ent->last) {
6212 /*
6213 * needed to detect some strange empty
6214 * node cases in the reader tests
6215 */
6216 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006217 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006218 (nw->type == XML_ELEMENT_NODE) &&
6219 (nw->children == NULL))
6220 nw->extra = 1;
6221
Daniel Veillard62f313b2001-07-04 19:49:14 +00006222 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006223 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006224 cur = cur->next;
6225 }
Daniel Veillard81273902003-09-30 00:43:48 +00006226#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006227 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006228 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006229#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006230 } else if (list == NULL) {
6231 xmlNodePtr nw = NULL, cur, next, last,
6232 firstChild = NULL;
6233 /*
6234 * Copy the entity child list and make it the new
6235 * entity child list. The goal is to make sure any
6236 * ID or REF referenced will be the one from the
6237 * document content and not the entity copy.
6238 */
6239 cur = ent->children;
6240 ent->children = NULL;
6241 last = ent->last;
6242 ent->last = NULL;
6243 while (cur != NULL) {
6244 next = cur->next;
6245 cur->next = NULL;
6246 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006247 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006248 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006249 if (nw->_private == NULL)
6250 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006251 if (firstChild == NULL){
6252 firstChild = cur;
6253 }
6254 xmlAddChild((xmlNodePtr) ent, nw);
6255 xmlAddChild(ctxt->node, cur);
6256 }
6257 if (cur == last)
6258 break;
6259 cur = next;
6260 }
6261 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006262#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006265#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006266 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006267 const xmlChar *nbktext;
6268
Daniel Veillard62f313b2001-07-04 19:49:14 +00006269 /*
6270 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006271 * node with a possible previous text one which
6272 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006273 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006274 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6275 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006277 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 if ((ent->last != ent->children) &&
6279 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006280 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006281 xmlAddChildList(ctxt->node, ent->children);
6282 }
6283
Owen Taylor3473f882001-02-23 17:55:21 +00006284 /*
6285 * This is to avoid a nasty side effect, see
6286 * characters() in SAX.c
6287 */
6288 ctxt->nodemem = 0;
6289 ctxt->nodelen = 0;
6290 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006291 }
6292 }
6293 } else {
6294 val = ent->content;
6295 if (val == NULL) return;
6296 /*
6297 * inline the entity.
6298 */
6299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6300 (!ctxt->disableSAX))
6301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6302 }
6303 }
6304}
6305
6306/**
6307 * xmlParseEntityRef:
6308 * @ctxt: an XML parser context
6309 *
6310 * parse ENTITY references declarations
6311 *
6312 * [68] EntityRef ::= '&' Name ';'
6313 *
6314 * [ WFC: Entity Declared ]
6315 * In a document without any DTD, a document with only an internal DTD
6316 * subset which contains no parameter entity references, or a document
6317 * with "standalone='yes'", the Name given in the entity reference
6318 * must match that in an entity declaration, except that well-formed
6319 * documents need not declare any of the following entities: amp, lt,
6320 * gt, apos, quot. The declaration of a parameter entity must precede
6321 * any reference to it. Similarly, the declaration of a general entity
6322 * must precede any reference to it which appears in a default value in an
6323 * attribute-list declaration. Note that if entities are declared in the
6324 * external subset or in external parameter entities, a non-validating
6325 * processor is not obligated to read and process their declarations;
6326 * for such documents, the rule that an entity must be declared is a
6327 * well-formedness constraint only if standalone='yes'.
6328 *
6329 * [ WFC: Parsed Entity ]
6330 * An entity reference must not contain the name of an unparsed entity
6331 *
6332 * Returns the xmlEntityPtr if found, or NULL otherwise.
6333 */
6334xmlEntityPtr
6335xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 xmlEntityPtr ent = NULL;
6338
6339 GROW;
6340
6341 if (RAW == '&') {
6342 NEXT;
6343 name = xmlParseName(ctxt);
6344 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006347 } else {
6348 if (RAW == ';') {
6349 NEXT;
6350 /*
6351 * Ask first SAX for entity resolution, otherwise try the
6352 * predefined set.
6353 */
6354 if (ctxt->sax != NULL) {
6355 if (ctxt->sax->getEntity != NULL)
6356 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006357 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006358 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006359 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6360 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006361 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006362 }
Owen Taylor3473f882001-02-23 17:55:21 +00006363 }
6364 /*
6365 * [ WFC: Entity Declared ]
6366 * In a document without any DTD, a document with only an
6367 * internal DTD subset which contains no parameter entity
6368 * references, or a document with "standalone='yes'", the
6369 * Name given in the entity reference must match that in an
6370 * entity declaration, except that well-formed documents
6371 * need not declare any of the following entities: amp, lt,
6372 * gt, apos, quot.
6373 * The declaration of a parameter entity must precede any
6374 * reference to it.
6375 * Similarly, the declaration of a general entity must
6376 * precede any reference to it which appears in a default
6377 * value in an attribute-list declaration. Note that if
6378 * entities are declared in the external subset or in
6379 * external parameter entities, a non-validating processor
6380 * is not obligated to read and process their declarations;
6381 * for such documents, the rule that an entity must be
6382 * declared is a well-formedness constraint only if
6383 * standalone='yes'.
6384 */
6385 if (ent == NULL) {
6386 if ((ctxt->standalone == 1) ||
6387 ((ctxt->hasExternalSubset == 0) &&
6388 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006389 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006390 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006392 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006393 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006394 if ((ctxt->inSubset == 0) &&
6395 (ctxt->sax != NULL) &&
6396 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006397 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006398 }
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402
6403 /*
6404 * [ WFC: Parsed Entity ]
6405 * An entity reference must not contain the name of an
6406 * unparsed entity
6407 */
6408 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006409 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006410 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006411 }
6412
6413 /*
6414 * [ WFC: No External Entity References ]
6415 * Attribute values cannot contain direct or indirect
6416 * entity references to external entities.
6417 */
6418 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6419 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6421 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423 /*
6424 * [ WFC: No < in Attribute Values ]
6425 * The replacement text of any entity referred to directly or
6426 * indirectly in an attribute value (other than "&lt;") must
6427 * not contain a <.
6428 */
6429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6430 (ent != NULL) &&
6431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6432 (ent->content != NULL) &&
6433 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006434 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006435 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006436 }
6437
6438 /*
6439 * Internal check, no parameter entities here ...
6440 */
6441 else {
6442 switch (ent->etype) {
6443 case XML_INTERNAL_PARAMETER_ENTITY:
6444 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006445 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6446 "Attempt to reference the parameter entity '%s'\n",
6447 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006448 break;
6449 default:
6450 break;
6451 }
6452 }
6453
6454 /*
6455 * [ WFC: No Recursion ]
6456 * A parsed entity must not contain a recursive reference
6457 * to itself, either directly or indirectly.
6458 * Done somewhere else
6459 */
6460
6461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 }
6465 }
6466 return(ent);
6467}
6468
6469/**
6470 * xmlParseStringEntityRef:
6471 * @ctxt: an XML parser context
6472 * @str: a pointer to an index in the string
6473 *
6474 * parse ENTITY references declarations, but this version parses it from
6475 * a string value.
6476 *
6477 * [68] EntityRef ::= '&' Name ';'
6478 *
6479 * [ WFC: Entity Declared ]
6480 * In a document without any DTD, a document with only an internal DTD
6481 * subset which contains no parameter entity references, or a document
6482 * with "standalone='yes'", the Name given in the entity reference
6483 * must match that in an entity declaration, except that well-formed
6484 * documents need not declare any of the following entities: amp, lt,
6485 * gt, apos, quot. The declaration of a parameter entity must precede
6486 * any reference to it. Similarly, the declaration of a general entity
6487 * must precede any reference to it which appears in a default value in an
6488 * attribute-list declaration. Note that if entities are declared in the
6489 * external subset or in external parameter entities, a non-validating
6490 * processor is not obligated to read and process their declarations;
6491 * for such documents, the rule that an entity must be declared is a
6492 * well-formedness constraint only if standalone='yes'.
6493 *
6494 * [ WFC: Parsed Entity ]
6495 * An entity reference must not contain the name of an unparsed entity
6496 *
6497 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6498 * is updated to the current location in the string.
6499 */
6500xmlEntityPtr
6501xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6502 xmlChar *name;
6503 const xmlChar *ptr;
6504 xmlChar cur;
6505 xmlEntityPtr ent = NULL;
6506
6507 if ((str == NULL) || (*str == NULL))
6508 return(NULL);
6509 ptr = *str;
6510 cur = *ptr;
6511 if (cur == '&') {
6512 ptr++;
6513 cur = *ptr;
6514 name = xmlParseStringName(ctxt, &ptr);
6515 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6517 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006518 } else {
6519 if (*ptr == ';') {
6520 ptr++;
6521 /*
6522 * Ask first SAX for entity resolution, otherwise try the
6523 * predefined set.
6524 */
6525 if (ctxt->sax != NULL) {
6526 if (ctxt->sax->getEntity != NULL)
6527 ent = ctxt->sax->getEntity(ctxt->userData, name);
6528 if (ent == NULL)
6529 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006530 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006531 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006532 }
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
6534 /*
6535 * [ WFC: Entity Declared ]
6536 * In a document without any DTD, a document with only an
6537 * internal DTD subset which contains no parameter entity
6538 * references, or a document with "standalone='yes'", the
6539 * Name given in the entity reference must match that in an
6540 * entity declaration, except that well-formed documents
6541 * need not declare any of the following entities: amp, lt,
6542 * gt, apos, quot.
6543 * The declaration of a parameter entity must precede any
6544 * reference to it.
6545 * Similarly, the declaration of a general entity must
6546 * precede any reference to it which appears in a default
6547 * value in an attribute-list declaration. Note that if
6548 * entities are declared in the external subset or in
6549 * external parameter entities, a non-validating processor
6550 * is not obligated to read and process their declarations;
6551 * for such documents, the rule that an entity must be
6552 * declared is a well-formedness constraint only if
6553 * standalone='yes'.
6554 */
6555 if (ent == NULL) {
6556 if ((ctxt->standalone == 1) ||
6557 ((ctxt->hasExternalSubset == 0) &&
6558 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006560 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006563 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006564 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006566 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568
6569 /*
6570 * [ WFC: Parsed Entity ]
6571 * An entity reference must not contain the name of an
6572 * unparsed entity
6573 */
6574 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 }
6578
6579 /*
6580 * [ WFC: No External Entity References ]
6581 * Attribute values cannot contain direct or indirect
6582 * entity references to external entities.
6583 */
6584 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006586 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006587 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589 /*
6590 * [ WFC: No < in Attribute Values ]
6591 * The replacement text of any entity referred to directly or
6592 * indirectly in an attribute value (other than "&lt;") must
6593 * not contain a <.
6594 */
6595 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6596 (ent != NULL) &&
6597 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6598 (ent->content != NULL) &&
6599 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006600 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6601 "'<' in entity '%s' is not allowed in attributes values\n",
6602 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604
6605 /*
6606 * Internal check, no parameter entities here ...
6607 */
6608 else {
6609 switch (ent->etype) {
6610 case XML_INTERNAL_PARAMETER_ENTITY:
6611 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6613 "Attempt to reference the parameter entity '%s'\n",
6614 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006615 break;
6616 default:
6617 break;
6618 }
6619 }
6620
6621 /*
6622 * [ WFC: No Recursion ]
6623 * A parsed entity must not contain a recursive reference
6624 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006625 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006626 */
6627
6628 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006629 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006630 }
6631 xmlFree(name);
6632 }
6633 }
6634 *str = ptr;
6635 return(ent);
6636}
6637
6638/**
6639 * xmlParsePEReference:
6640 * @ctxt: an XML parser context
6641 *
6642 * parse PEReference declarations
6643 * The entity content is handled directly by pushing it's content as
6644 * a new input stream.
6645 *
6646 * [69] PEReference ::= '%' Name ';'
6647 *
6648 * [ WFC: No Recursion ]
6649 * A parsed entity must not contain a recursive
6650 * reference to itself, either directly or indirectly.
6651 *
6652 * [ WFC: Entity Declared ]
6653 * In a document without any DTD, a document with only an internal DTD
6654 * subset which contains no parameter entity references, or a document
6655 * with "standalone='yes'", ... ... The declaration of a parameter
6656 * entity must precede any reference to it...
6657 *
6658 * [ VC: Entity Declared ]
6659 * In a document with an external subset or external parameter entities
6660 * with "standalone='no'", ... ... The declaration of a parameter entity
6661 * must precede any reference to it...
6662 *
6663 * [ WFC: In DTD ]
6664 * Parameter-entity references may only appear in the DTD.
6665 * NOTE: misleading but this is handled.
6666 */
6667void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006668xmlParsePEReference(xmlParserCtxtPtr ctxt)
6669{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006670 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006671 xmlEntityPtr entity = NULL;
6672 xmlParserInputPtr input;
6673
6674 if (RAW == '%') {
6675 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006676 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006677 if (name == NULL) {
6678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6679 "xmlParsePEReference: no name\n");
6680 } else {
6681 if (RAW == ';') {
6682 NEXT;
6683 if ((ctxt->sax != NULL) &&
6684 (ctxt->sax->getParameterEntity != NULL))
6685 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6686 name);
6687 if (entity == NULL) {
6688 /*
6689 * [ WFC: Entity Declared ]
6690 * In a document without any DTD, a document with only an
6691 * internal DTD subset which contains no parameter entity
6692 * references, or a document with "standalone='yes'", ...
6693 * ... The declaration of a parameter entity must precede
6694 * any reference to it...
6695 */
6696 if ((ctxt->standalone == 1) ||
6697 ((ctxt->hasExternalSubset == 0) &&
6698 (ctxt->hasPErefs == 0))) {
6699 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6700 "PEReference: %%%s; not found\n",
6701 name);
6702 } else {
6703 /*
6704 * [ VC: Entity Declared ]
6705 * In a document with an external subset or external
6706 * parameter entities with "standalone='no'", ...
6707 * ... The declaration of a parameter entity must
6708 * precede any reference to it...
6709 */
6710 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6711 "PEReference: %%%s; not found\n",
6712 name, NULL);
6713 ctxt->valid = 0;
6714 }
6715 } else {
6716 /*
6717 * Internal checking in case the entity quest barfed
6718 */
6719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6721 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6722 "Internal: %%%s; is not a parameter entity\n",
6723 name, NULL);
6724 } else if (ctxt->input->free != deallocblankswrapper) {
6725 input =
6726 xmlNewBlanksWrapperInputStream(ctxt, entity);
6727 xmlPushInput(ctxt, input);
6728 } else {
6729 /*
6730 * TODO !!!
6731 * handle the extra spaces added before and after
6732 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6733 */
6734 input = xmlNewEntityInputStream(ctxt, entity);
6735 xmlPushInput(ctxt, input);
6736 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006737 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006738 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006739 xmlParseTextDecl(ctxt);
6740 if (ctxt->errNo ==
6741 XML_ERR_UNSUPPORTED_ENCODING) {
6742 /*
6743 * The XML REC instructs us to stop parsing
6744 * right here
6745 */
6746 ctxt->instate = XML_PARSER_EOF;
6747 return;
6748 }
6749 }
6750 }
6751 }
6752 ctxt->hasPErefs = 1;
6753 } else {
6754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6755 }
6756 }
Owen Taylor3473f882001-02-23 17:55:21 +00006757 }
6758}
6759
6760/**
6761 * xmlParseStringPEReference:
6762 * @ctxt: an XML parser context
6763 * @str: a pointer to an index in the string
6764 *
6765 * parse PEReference declarations
6766 *
6767 * [69] PEReference ::= '%' Name ';'
6768 *
6769 * [ WFC: No Recursion ]
6770 * A parsed entity must not contain a recursive
6771 * reference to itself, either directly or indirectly.
6772 *
6773 * [ WFC: Entity Declared ]
6774 * In a document without any DTD, a document with only an internal DTD
6775 * subset which contains no parameter entity references, or a document
6776 * with "standalone='yes'", ... ... The declaration of a parameter
6777 * entity must precede any reference to it...
6778 *
6779 * [ VC: Entity Declared ]
6780 * In a document with an external subset or external parameter entities
6781 * with "standalone='no'", ... ... The declaration of a parameter entity
6782 * must precede any reference to it...
6783 *
6784 * [ WFC: In DTD ]
6785 * Parameter-entity references may only appear in the DTD.
6786 * NOTE: misleading but this is handled.
6787 *
6788 * Returns the string of the entity content.
6789 * str is updated to the current value of the index
6790 */
6791xmlEntityPtr
6792xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6793 const xmlChar *ptr;
6794 xmlChar cur;
6795 xmlChar *name;
6796 xmlEntityPtr entity = NULL;
6797
6798 if ((str == NULL) || (*str == NULL)) return(NULL);
6799 ptr = *str;
6800 cur = *ptr;
6801 if (cur == '%') {
6802 ptr++;
6803 cur = *ptr;
6804 name = xmlParseStringName(ctxt, &ptr);
6805 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6807 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006808 } else {
6809 cur = *ptr;
6810 if (cur == ';') {
6811 ptr++;
6812 cur = *ptr;
6813 if ((ctxt->sax != NULL) &&
6814 (ctxt->sax->getParameterEntity != NULL))
6815 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6816 name);
6817 if (entity == NULL) {
6818 /*
6819 * [ WFC: Entity Declared ]
6820 * In a document without any DTD, a document with only an
6821 * internal DTD subset which contains no parameter entity
6822 * references, or a document with "standalone='yes'", ...
6823 * ... The declaration of a parameter entity must precede
6824 * any reference to it...
6825 */
6826 if ((ctxt->standalone == 1) ||
6827 ((ctxt->hasExternalSubset == 0) &&
6828 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006830 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 } else {
6832 /*
6833 * [ VC: Entity Declared ]
6834 * In a document with an external subset or external
6835 * parameter entities with "standalone='no'", ...
6836 * ... The declaration of a parameter entity must
6837 * precede any reference to it...
6838 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6840 "PEReference: %%%s; not found\n",
6841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006842 ctxt->valid = 0;
6843 }
6844 } else {
6845 /*
6846 * Internal checking in case the entity quest barfed
6847 */
6848 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6849 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006850 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6851 "%%%s; is not a parameter entity\n",
6852 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006853 }
6854 }
6855 ctxt->hasPErefs = 1;
6856 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006857 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859 xmlFree(name);
6860 }
6861 }
6862 *str = ptr;
6863 return(entity);
6864}
6865
6866/**
6867 * xmlParseDocTypeDecl:
6868 * @ctxt: an XML parser context
6869 *
6870 * parse a DOCTYPE declaration
6871 *
6872 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6873 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6874 *
6875 * [ VC: Root Element Type ]
6876 * The Name in the document type declaration must match the element
6877 * type of the root element.
6878 */
6879
6880void
6881xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006882 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 xmlChar *ExternalID = NULL;
6884 xmlChar *URI = NULL;
6885
6886 /*
6887 * We know that '<!DOCTYPE' has been detected.
6888 */
6889 SKIP(9);
6890
6891 SKIP_BLANKS;
6892
6893 /*
6894 * Parse the DOCTYPE name.
6895 */
6896 name = xmlParseName(ctxt);
6897 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6899 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006900 }
6901 ctxt->intSubName = name;
6902
6903 SKIP_BLANKS;
6904
6905 /*
6906 * Check for SystemID and ExternalID
6907 */
6908 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6909
6910 if ((URI != NULL) || (ExternalID != NULL)) {
6911 ctxt->hasExternalSubset = 1;
6912 }
6913 ctxt->extSubURI = URI;
6914 ctxt->extSubSystem = ExternalID;
6915
6916 SKIP_BLANKS;
6917
6918 /*
6919 * Create and update the internal subset.
6920 */
6921 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6922 (!ctxt->disableSAX))
6923 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6924
6925 /*
6926 * Is there any internal subset declarations ?
6927 * they are handled separately in xmlParseInternalSubset()
6928 */
6929 if (RAW == '[')
6930 return;
6931
6932 /*
6933 * We should be at the end of the DOCTYPE declaration.
6934 */
6935 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006937 }
6938 NEXT;
6939}
6940
6941/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006942 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006943 * @ctxt: an XML parser context
6944 *
6945 * parse the internal subset declaration
6946 *
6947 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6948 */
6949
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006950static void
Owen Taylor3473f882001-02-23 17:55:21 +00006951xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6952 /*
6953 * Is there any DTD definition ?
6954 */
6955 if (RAW == '[') {
6956 ctxt->instate = XML_PARSER_DTD;
6957 NEXT;
6958 /*
6959 * Parse the succession of Markup declarations and
6960 * PEReferences.
6961 * Subsequence (markupdecl | PEReference | S)*
6962 */
6963 while (RAW != ']') {
6964 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006965 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006966
6967 SKIP_BLANKS;
6968 xmlParseMarkupDecl(ctxt);
6969 xmlParsePEReference(ctxt);
6970
6971 /*
6972 * Pop-up of finished entities.
6973 */
6974 while ((RAW == 0) && (ctxt->inputNr > 1))
6975 xmlPopInput(ctxt);
6976
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006979 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006980 break;
6981 }
6982 }
6983 if (RAW == ']') {
6984 NEXT;
6985 SKIP_BLANKS;
6986 }
6987 }
6988
6989 /*
6990 * We should be at the end of the DOCTYPE declaration.
6991 */
6992 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006993 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006994 }
6995 NEXT;
6996}
6997
Daniel Veillard81273902003-09-30 00:43:48 +00006998#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006999/**
7000 * xmlParseAttribute:
7001 * @ctxt: an XML parser context
7002 * @value: a xmlChar ** used to store the value of the attribute
7003 *
7004 * parse an attribute
7005 *
7006 * [41] Attribute ::= Name Eq AttValue
7007 *
7008 * [ WFC: No External Entity References ]
7009 * Attribute values cannot contain direct or indirect entity references
7010 * to external entities.
7011 *
7012 * [ WFC: No < in Attribute Values ]
7013 * The replacement text of any entity referred to directly or indirectly in
7014 * an attribute value (other than "&lt;") must not contain a <.
7015 *
7016 * [ VC: Attribute Value Type ]
7017 * The attribute must have been declared; the value must be of the type
7018 * declared for it.
7019 *
7020 * [25] Eq ::= S? '=' S?
7021 *
7022 * With namespace:
7023 *
7024 * [NS 11] Attribute ::= QName Eq AttValue
7025 *
7026 * Also the case QName == xmlns:??? is handled independently as a namespace
7027 * definition.
7028 *
7029 * Returns the attribute name, and the value in *value.
7030 */
7031
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007032const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007033xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007034 const xmlChar *name;
7035 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007036
7037 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007038 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007039 name = xmlParseName(ctxt);
7040 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007042 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007043 return(NULL);
7044 }
7045
7046 /*
7047 * read the value
7048 */
7049 SKIP_BLANKS;
7050 if (RAW == '=') {
7051 NEXT;
7052 SKIP_BLANKS;
7053 val = xmlParseAttValue(ctxt);
7054 ctxt->instate = XML_PARSER_CONTENT;
7055 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007056 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007057 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007058 return(NULL);
7059 }
7060
7061 /*
7062 * Check that xml:lang conforms to the specification
7063 * No more registered as an error, just generate a warning now
7064 * since this was deprecated in XML second edition
7065 */
7066 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7067 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007068 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7069 "Malformed value for xml:lang : %s\n",
7070 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007071 }
7072 }
7073
7074 /*
7075 * Check that xml:space conforms to the specification
7076 */
7077 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7078 if (xmlStrEqual(val, BAD_CAST "default"))
7079 *(ctxt->space) = 0;
7080 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7081 *(ctxt->space) = 1;
7082 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007083 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007084"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007085 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007086 }
7087 }
7088
7089 *value = val;
7090 return(name);
7091}
7092
7093/**
7094 * xmlParseStartTag:
7095 * @ctxt: an XML parser context
7096 *
7097 * parse a start of tag either for rule element or
7098 * EmptyElement. In both case we don't parse the tag closing chars.
7099 *
7100 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7101 *
7102 * [ WFC: Unique Att Spec ]
7103 * No attribute name may appear more than once in the same start-tag or
7104 * empty-element tag.
7105 *
7106 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7107 *
7108 * [ WFC: Unique Att Spec ]
7109 * No attribute name may appear more than once in the same start-tag or
7110 * empty-element tag.
7111 *
7112 * With namespace:
7113 *
7114 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7115 *
7116 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7117 *
7118 * Returns the element name parsed
7119 */
7120
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007121const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007122xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007123 const xmlChar *name;
7124 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007125 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007126 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007128 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007129 int i;
7130
7131 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007132 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007133
7134 name = xmlParseName(ctxt);
7135 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007137 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 return(NULL);
7139 }
7140
7141 /*
7142 * Now parse the attributes, it ends up with the ending
7143 *
7144 * (S Attribute)* S?
7145 */
7146 SKIP_BLANKS;
7147 GROW;
7148
Daniel Veillard21a0f912001-02-25 19:54:14 +00007149 while ((RAW != '>') &&
7150 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007151 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007152 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007153 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007154
7155 attname = xmlParseAttribute(ctxt, &attvalue);
7156 if ((attname != NULL) && (attvalue != NULL)) {
7157 /*
7158 * [ WFC: Unique Att Spec ]
7159 * No attribute name may appear more than once in the same
7160 * start-tag or empty-element tag.
7161 */
7162 for (i = 0; i < nbatts;i += 2) {
7163 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007164 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 xmlFree(attvalue);
7166 goto failed;
7167 }
7168 }
Owen Taylor3473f882001-02-23 17:55:21 +00007169 /*
7170 * Add the pair to atts
7171 */
7172 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007173 maxatts = 22; /* allow for 10 attrs by default */
7174 atts = (const xmlChar **)
7175 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007176 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007177 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007178 if (attvalue != NULL)
7179 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007180 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007181 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007182 ctxt->atts = atts;
7183 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007184 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007185 const xmlChar **n;
7186
Owen Taylor3473f882001-02-23 17:55:21 +00007187 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007188 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007189 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007191 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 if (attvalue != NULL)
7193 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007196 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007197 ctxt->atts = atts;
7198 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007199 }
7200 atts[nbatts++] = attname;
7201 atts[nbatts++] = attvalue;
7202 atts[nbatts] = NULL;
7203 atts[nbatts + 1] = NULL;
7204 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007205 if (attvalue != NULL)
7206 xmlFree(attvalue);
7207 }
7208
7209failed:
7210
Daniel Veillard3772de32002-12-17 10:31:45 +00007211 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007212 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7213 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007214 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7216 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007217 }
7218 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007219 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7220 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007221 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7222 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007223 break;
7224 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007225 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007226 GROW;
7227 }
7228
7229 /*
7230 * SAX: Start of Element !
7231 */
7232 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007233 (!ctxt->disableSAX)) {
7234 if (nbatts > 0)
7235 ctxt->sax->startElement(ctxt->userData, name, atts);
7236 else
7237 ctxt->sax->startElement(ctxt->userData, name, NULL);
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239
7240 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007241 /* Free only the content strings */
7242 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007243 if (atts[i] != NULL)
7244 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007245 }
7246 return(name);
7247}
7248
7249/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007251 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 * @line: line of the start tag
7253 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007254 *
7255 * parse an end of tag
7256 *
7257 * [42] ETag ::= '</' Name S? '>'
7258 *
7259 * With namespace
7260 *
7261 * [NS 9] ETag ::= '</' QName S? '>'
7262 */
7263
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007264static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007265xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007266 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007267
7268 GROW;
7269 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007270 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007271 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007272 return;
7273 }
7274 SKIP(2);
7275
Daniel Veillard46de64e2002-05-29 08:21:33 +00007276 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007277
7278 /*
7279 * We should definitely be at the ending "S? '>'" part
7280 */
7281 GROW;
7282 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007283 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007284 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007285 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007286 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007287
7288 /*
7289 * [ WFC: Element Type Match ]
7290 * The Name in an element's end-tag must match the element type in the
7291 * start-tag.
7292 *
7293 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007294 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007295 if (name == NULL) name = BAD_CAST "unparseable";
7296 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007297 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007298 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007299 }
7300
7301 /*
7302 * SAX: End of Tag
7303 */
7304 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7305 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007306 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007307
Daniel Veillarde57ec792003-09-10 10:50:59 +00007308 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007309 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007310 return;
7311}
7312
7313/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007314 * xmlParseEndTag:
7315 * @ctxt: an XML parser context
7316 *
7317 * parse an end of tag
7318 *
7319 * [42] ETag ::= '</' Name S? '>'
7320 *
7321 * With namespace
7322 *
7323 * [NS 9] ETag ::= '</' QName S? '>'
7324 */
7325
7326void
7327xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 xmlParseEndTag1(ctxt, 0);
7329}
Daniel Veillard81273902003-09-30 00:43:48 +00007330#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331
7332/************************************************************************
7333 * *
7334 * SAX 2 specific operations *
7335 * *
7336 ************************************************************************/
7337
7338static const xmlChar *
7339xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7340 int len = 0, l;
7341 int c;
7342 int count = 0;
7343
7344 /*
7345 * Handler for more complex cases
7346 */
7347 GROW;
7348 c = CUR_CHAR(l);
7349 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007350 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007351 return(NULL);
7352 }
7353
7354 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007355 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007356 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007357 (IS_COMBINING(c)) ||
7358 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359 if (count++ > 100) {
7360 count = 0;
7361 GROW;
7362 }
7363 len += l;
7364 NEXTL(l);
7365 c = CUR_CHAR(l);
7366 }
7367 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7368}
7369
7370/*
7371 * xmlGetNamespace:
7372 * @ctxt: an XML parser context
7373 * @prefix: the prefix to lookup
7374 *
7375 * Lookup the namespace name for the @prefix (which ca be NULL)
7376 * The prefix must come from the @ctxt->dict dictionnary
7377 *
7378 * Returns the namespace name or NULL if not bound
7379 */
7380static const xmlChar *
7381xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7382 int i;
7383
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007385 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 if (ctxt->nsTab[i] == prefix) {
7387 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7388 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007390 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 return(NULL);
7392}
7393
7394/**
7395 * xmlParseNCName:
7396 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007397 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 *
7399 * parse an XML name.
7400 *
7401 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7402 * CombiningChar | Extender
7403 *
7404 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7405 *
7406 * Returns the Name parsed or NULL
7407 */
7408
7409static const xmlChar *
7410xmlParseNCName(xmlParserCtxtPtr ctxt) {
7411 const xmlChar *in;
7412 const xmlChar *ret;
7413 int count = 0;
7414
7415 /*
7416 * Accelerator for simple ASCII names
7417 */
7418 in = ctxt->input->cur;
7419 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7420 ((*in >= 0x41) && (*in <= 0x5A)) ||
7421 (*in == '_')) {
7422 in++;
7423 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7424 ((*in >= 0x41) && (*in <= 0x5A)) ||
7425 ((*in >= 0x30) && (*in <= 0x39)) ||
7426 (*in == '_') || (*in == '-') ||
7427 (*in == '.'))
7428 in++;
7429 if ((*in > 0) && (*in < 0x80)) {
7430 count = in - ctxt->input->cur;
7431 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7432 ctxt->input->cur = in;
7433 ctxt->nbChars += count;
7434 ctxt->input->col += count;
7435 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007436 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007437 }
7438 return(ret);
7439 }
7440 }
7441 return(xmlParseNCNameComplex(ctxt));
7442}
7443
7444/**
7445 * xmlParseQName:
7446 * @ctxt: an XML parser context
7447 * @prefix: pointer to store the prefix part
7448 *
7449 * parse an XML Namespace QName
7450 *
7451 * [6] QName ::= (Prefix ':')? LocalPart
7452 * [7] Prefix ::= NCName
7453 * [8] LocalPart ::= NCName
7454 *
7455 * Returns the Name parsed or NULL
7456 */
7457
7458static const xmlChar *
7459xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7460 const xmlChar *l, *p;
7461
7462 GROW;
7463
7464 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007465 if (l == NULL) {
7466 if (CUR == ':') {
7467 l = xmlParseName(ctxt);
7468 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7470 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007471 *prefix = NULL;
7472 return(l);
7473 }
7474 }
7475 return(NULL);
7476 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477 if (CUR == ':') {
7478 NEXT;
7479 p = l;
7480 l = xmlParseNCName(ctxt);
7481 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007482 xmlChar *tmp;
7483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007484 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7485 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007486 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7487 p = xmlDictLookup(ctxt->dict, tmp, -1);
7488 if (tmp != NULL) xmlFree(tmp);
7489 *prefix = NULL;
7490 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007491 }
7492 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007493 xmlChar *tmp;
7494
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007495 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007497 NEXT;
7498 tmp = (xmlChar *) xmlParseName(ctxt);
7499 if (tmp != NULL) {
7500 tmp = xmlBuildQName(tmp, l, NULL, 0);
7501 l = xmlDictLookup(ctxt->dict, tmp, -1);
7502 if (tmp != NULL) xmlFree(tmp);
7503 *prefix = p;
7504 return(l);
7505 }
7506 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7507 l = xmlDictLookup(ctxt->dict, tmp, -1);
7508 if (tmp != NULL) xmlFree(tmp);
7509 *prefix = p;
7510 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 }
7512 *prefix = p;
7513 } else
7514 *prefix = NULL;
7515 return(l);
7516}
7517
7518/**
7519 * xmlParseQNameAndCompare:
7520 * @ctxt: an XML parser context
7521 * @name: the localname
7522 * @prefix: the prefix, if any.
7523 *
7524 * parse an XML name and compares for match
7525 * (specialized for endtag parsing)
7526 *
7527 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7528 * and the name for mismatch
7529 */
7530
7531static const xmlChar *
7532xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7533 xmlChar const *prefix) {
7534 const xmlChar *cmp = name;
7535 const xmlChar *in;
7536 const xmlChar *ret;
7537 const xmlChar *prefix2;
7538
7539 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7540
7541 GROW;
7542 in = ctxt->input->cur;
7543
7544 cmp = prefix;
7545 while (*in != 0 && *in == *cmp) {
7546 ++in;
7547 ++cmp;
7548 }
7549 if ((*cmp == 0) && (*in == ':')) {
7550 in++;
7551 cmp = name;
7552 while (*in != 0 && *in == *cmp) {
7553 ++in;
7554 ++cmp;
7555 }
William M. Brack76e95df2003-10-18 16:20:14 +00007556 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 /* success */
7558 ctxt->input->cur = in;
7559 return((const xmlChar*) 1);
7560 }
7561 }
7562 /*
7563 * all strings coms from the dictionary, equality can be done directly
7564 */
7565 ret = xmlParseQName (ctxt, &prefix2);
7566 if ((ret == name) && (prefix == prefix2))
7567 return((const xmlChar*) 1);
7568 return ret;
7569}
7570
7571/**
7572 * xmlParseAttValueInternal:
7573 * @ctxt: an XML parser context
7574 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007575 * @alloc: whether the attribute was reallocated as a new string
7576 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577 *
7578 * parse a value for an attribute.
7579 * NOTE: if no normalization is needed, the routine will return pointers
7580 * directly from the data buffer.
7581 *
7582 * 3.3.3 Attribute-Value Normalization:
7583 * Before the value of an attribute is passed to the application or
7584 * checked for validity, the XML processor must normalize it as follows:
7585 * - a character reference is processed by appending the referenced
7586 * character to the attribute value
7587 * - an entity reference is processed by recursively processing the
7588 * replacement text of the entity
7589 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7590 * appending #x20 to the normalized value, except that only a single
7591 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7592 * parsed entity or the literal entity value of an internal parsed entity
7593 * - other characters are processed by appending them to the normalized value
7594 * If the declared value is not CDATA, then the XML processor must further
7595 * process the normalized attribute value by discarding any leading and
7596 * trailing space (#x20) characters, and by replacing sequences of space
7597 * (#x20) characters by a single space (#x20) character.
7598 * All attributes for which no declaration has been read should be treated
7599 * by a non-validating parser as if declared CDATA.
7600 *
7601 * Returns the AttValue parsed or NULL. The value has to be freed by the
7602 * caller if it was copied, this can be detected by val[*len] == 0.
7603 */
7604
7605static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007606xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7607 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007608{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007610 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 xmlChar *ret = NULL;
7612
7613 GROW;
7614 in = (xmlChar *) CUR_PTR;
7615 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007616 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007617 return (NULL);
7618 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007620
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007621 /*
7622 * try to handle in this routine the most common case where no
7623 * allocation of a new string is required and where content is
7624 * pure ASCII.
7625 */
7626 limit = *in++;
7627 end = ctxt->input->end;
7628 start = in;
7629 if (in >= end) {
7630 const xmlChar *oldbase = ctxt->input->base;
7631 GROW;
7632 if (oldbase != ctxt->input->base) {
7633 long delta = ctxt->input->base - oldbase;
7634 start = start + delta;
7635 in = in + delta;
7636 }
7637 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007639 if (normalize) {
7640 /*
7641 * Skip any leading spaces
7642 */
7643 while ((in < end) && (*in != limit) &&
7644 ((*in == 0x20) || (*in == 0x9) ||
7645 (*in == 0xA) || (*in == 0xD))) {
7646 in++;
7647 start = in;
7648 if (in >= end) {
7649 const xmlChar *oldbase = ctxt->input->base;
7650 GROW;
7651 if (oldbase != ctxt->input->base) {
7652 long delta = ctxt->input->base - oldbase;
7653 start = start + delta;
7654 in = in + delta;
7655 }
7656 end = ctxt->input->end;
7657 }
7658 }
7659 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7660 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7661 if ((*in++ == 0x20) && (*in == 0x20)) break;
7662 if (in >= end) {
7663 const xmlChar *oldbase = ctxt->input->base;
7664 GROW;
7665 if (oldbase != ctxt->input->base) {
7666 long delta = ctxt->input->base - oldbase;
7667 start = start + delta;
7668 in = in + delta;
7669 }
7670 end = ctxt->input->end;
7671 }
7672 }
7673 last = in;
7674 /*
7675 * skip the trailing blanks
7676 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007677 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 while ((in < end) && (*in != limit) &&
7679 ((*in == 0x20) || (*in == 0x9) ||
7680 (*in == 0xA) || (*in == 0xD))) {
7681 in++;
7682 if (in >= end) {
7683 const xmlChar *oldbase = ctxt->input->base;
7684 GROW;
7685 if (oldbase != ctxt->input->base) {
7686 long delta = ctxt->input->base - oldbase;
7687 start = start + delta;
7688 in = in + delta;
7689 last = last + delta;
7690 }
7691 end = ctxt->input->end;
7692 }
7693 }
7694 if (*in != limit) goto need_complex;
7695 } else {
7696 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7697 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7698 in++;
7699 if (in >= end) {
7700 const xmlChar *oldbase = ctxt->input->base;
7701 GROW;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7705 in = in + delta;
7706 }
7707 end = ctxt->input->end;
7708 }
7709 }
7710 last = in;
7711 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007713 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007715 *len = last - start;
7716 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007718 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007719 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 }
7721 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007724need_complex:
7725 if (alloc) *alloc = 1;
7726 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727}
7728
7729/**
7730 * xmlParseAttribute2:
7731 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007732 * @pref: the element prefix
7733 * @elem: the element name
7734 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007736 * @len: an int * to save the length of the attribute
7737 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007738 *
7739 * parse an attribute in the new SAX2 framework.
7740 *
7741 * Returns the attribute name, and the value in *value, .
7742 */
7743
7744static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007745xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7746 const xmlChar *pref, const xmlChar *elem,
7747 const xmlChar **prefix, xmlChar **value,
7748 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007750 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007751 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752
7753 *value = NULL;
7754 GROW;
7755 name = xmlParseQName(ctxt, prefix);
7756 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7758 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 return(NULL);
7760 }
7761
7762 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007763 * get the type if needed
7764 */
7765 if (ctxt->attsSpecial != NULL) {
7766 int type;
7767
7768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7769 pref, elem, *prefix, name);
7770 if (type != 0) normalize = 1;
7771 }
7772
7773 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 * read the value
7775 */
7776 SKIP_BLANKS;
7777 if (RAW == '=') {
7778 NEXT;
7779 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007780 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 ctxt->instate = XML_PARSER_CONTENT;
7782 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007783 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 return(NULL);
7786 }
7787
Daniel Veillardd8925572005-06-08 22:34:55 +00007788 if (*prefix == ctxt->str_xml) {
7789 /*
7790 * Check that xml:lang conforms to the specification
7791 * No more registered as an error, just generate a warning now
7792 * since this was deprecated in XML second edition
7793 */
7794 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7795 internal_val = xmlStrndup(val, *len);
7796 if (!xmlCheckLanguageID(internal_val)) {
7797 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7798 "Malformed value for xml:lang : %s\n",
7799 internal_val, NULL);
7800 }
7801 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802
Daniel Veillardd8925572005-06-08 22:34:55 +00007803 /*
7804 * Check that xml:space conforms to the specification
7805 */
7806 if (xmlStrEqual(name, BAD_CAST "space")) {
7807 internal_val = xmlStrndup(val, *len);
7808 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7809 *(ctxt->space) = 0;
7810 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7811 *(ctxt->space) = 1;
7812 else {
7813 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007815 internal_val, NULL);
7816 }
7817 }
7818 if (internal_val) {
7819 xmlFree(internal_val);
7820 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822
7823 *value = val;
7824 return(name);
7825}
7826
7827/**
7828 * xmlParseStartTag2:
7829 * @ctxt: an XML parser context
7830 *
7831 * parse a start of tag either for rule element or
7832 * EmptyElement. In both case we don't parse the tag closing chars.
7833 * This routine is called when running SAX2 parsing
7834 *
7835 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7836 *
7837 * [ WFC: Unique Att Spec ]
7838 * No attribute name may appear more than once in the same start-tag or
7839 * empty-element tag.
7840 *
7841 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7842 *
7843 * [ WFC: Unique Att Spec ]
7844 * No attribute name may appear more than once in the same start-tag or
7845 * empty-element tag.
7846 *
7847 * With namespace:
7848 *
7849 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7850 *
7851 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7852 *
7853 * Returns the element name parsed
7854 */
7855
7856static const xmlChar *
7857xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007858 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 const xmlChar *localname;
7860 const xmlChar *prefix;
7861 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007862 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 const xmlChar *nsname;
7864 xmlChar *attvalue;
7865 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007868 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007869 const xmlChar *base;
7870 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007871 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872
7873 if (RAW != '<') return(NULL);
7874 NEXT1;
7875
7876 /*
7877 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7878 * point since the attribute values may be stored as pointers to
7879 * the buffer and calling SHRINK would destroy them !
7880 * The Shrinking is only possible once the full set of attribute
7881 * callbacks have been done.
7882 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007885 base = ctxt->input->base;
7886 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007887 oldline = ctxt->input->line;
7888 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007889 nbatts = 0;
7890 nratts = 0;
7891 nbdef = 0;
7892 nbNs = 0;
7893 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007894 /* Forget any namespaces added during an earlier parse of this element. */
7895 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007896
7897 localname = xmlParseQName(ctxt, &prefix);
7898 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7900 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007901 return(NULL);
7902 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007903 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904
7905 /*
7906 * Now parse the attributes, it ends up with the ending
7907 *
7908 * (S Attribute)* S?
7909 */
7910 SKIP_BLANKS;
7911 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007912 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913
7914 while ((RAW != '>') &&
7915 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007916 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007917 const xmlChar *q = CUR_PTR;
7918 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007919 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007921 attname = xmlParseAttribute2(ctxt, prefix, localname,
7922 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007923 if (ctxt->input->base != base) {
7924 if ((attvalue != NULL) && (alloc != 0))
7925 xmlFree(attvalue);
7926 attvalue = NULL;
7927 goto base_changed;
7928 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 if ((attname != NULL) && (attvalue != NULL)) {
7930 if (len < 0) len = xmlStrlen(attvalue);
7931 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007932 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7933 xmlURIPtr uri;
7934
7935 if (*URL != 0) {
7936 uri = xmlParseURI((const char *) URL);
7937 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007938 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7939 "xmlns: %s not a valid URI\n",
7940 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007941 } else {
7942 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007943 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7944 "xmlns: URI %s is not absolute\n",
7945 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007946 }
7947 xmlFreeURI(uri);
7948 }
7949 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007951 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007953 for (j = 1;j <= nbNs;j++)
7954 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7955 break;
7956 if (j <= nbNs)
7957 xmlErrAttributeDup(ctxt, NULL, attname);
7958 else
7959 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007960 if (alloc != 0) xmlFree(attvalue);
7961 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 continue;
7963 }
7964 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007965 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7966 xmlURIPtr uri;
7967
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007968 if (attname == ctxt->str_xml) {
7969 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007970 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7971 "xml namespace prefix mapped to wrong URI\n",
7972 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007973 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007974 /*
7975 * Do not keep a namespace definition node
7976 */
7977 if (alloc != 0) xmlFree(attvalue);
7978 SKIP_BLANKS;
7979 continue;
7980 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007981 uri = xmlParseURI((const char *) URL);
7982 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007983 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7984 "xmlns:%s: '%s' is not a valid URI\n",
7985 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007986 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007987 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007988 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7989 "xmlns:%s: URI %s is not absolute\n",
7990 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007991 }
7992 xmlFreeURI(uri);
7993 }
7994
Daniel Veillard0fb18932003-09-07 09:14:37 +00007995 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007996 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007997 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 for (j = 1;j <= nbNs;j++)
7999 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8000 break;
8001 if (j <= nbNs)
8002 xmlErrAttributeDup(ctxt, aprefix, attname);
8003 else
8004 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008005 if (alloc != 0) xmlFree(attvalue);
8006 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008007 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008008 continue;
8009 }
8010
8011 /*
8012 * Add the pair to atts
8013 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008014 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8015 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 if (attvalue[len] == 0)
8017 xmlFree(attvalue);
8018 goto failed;
8019 }
8020 maxatts = ctxt->maxatts;
8021 atts = ctxt->atts;
8022 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008023 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008024 atts[nbatts++] = attname;
8025 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008026 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008027 atts[nbatts++] = attvalue;
8028 attvalue += len;
8029 atts[nbatts++] = attvalue;
8030 /*
8031 * tag if some deallocation is needed
8032 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008033 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 } else {
8035 if ((attvalue != NULL) && (attvalue[len] == 0))
8036 xmlFree(attvalue);
8037 }
8038
8039failed:
8040
8041 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008042 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008043 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8044 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008045 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8047 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008048 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 }
8050 SKIP_BLANKS;
8051 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8052 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008053 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008054 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008055 break;
8056 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008057 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008058 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008059 }
8060
Daniel Veillard0fb18932003-09-07 09:14:37 +00008061 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008062 * The attributes defaulting
8063 */
8064 if (ctxt->attsDefault != NULL) {
8065 xmlDefAttrsPtr defaults;
8066
8067 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8068 if (defaults != NULL) {
8069 for (i = 0;i < defaults->nbAttrs;i++) {
8070 attname = defaults->values[4 * i];
8071 aprefix = defaults->values[4 * i + 1];
8072
8073 /*
8074 * special work for namespaces defaulted defs
8075 */
8076 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8077 /*
8078 * check that it's not a defined namespace
8079 */
8080 for (j = 1;j <= nbNs;j++)
8081 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8082 break;
8083 if (j <= nbNs) continue;
8084
8085 nsname = xmlGetNamespace(ctxt, NULL);
8086 if (nsname != defaults->values[4 * i + 2]) {
8087 if (nsPush(ctxt, NULL,
8088 defaults->values[4 * i + 2]) > 0)
8089 nbNs++;
8090 }
8091 } else if (aprefix == ctxt->str_xmlns) {
8092 /*
8093 * check that it's not a defined namespace
8094 */
8095 for (j = 1;j <= nbNs;j++)
8096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8097 break;
8098 if (j <= nbNs) continue;
8099
8100 nsname = xmlGetNamespace(ctxt, attname);
8101 if (nsname != defaults->values[2]) {
8102 if (nsPush(ctxt, attname,
8103 defaults->values[4 * i + 2]) > 0)
8104 nbNs++;
8105 }
8106 } else {
8107 /*
8108 * check that it's not a defined attribute
8109 */
8110 for (j = 0;j < nbatts;j+=5) {
8111 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8112 break;
8113 }
8114 if (j < nbatts) continue;
8115
8116 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8117 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008118 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008119 }
8120 maxatts = ctxt->maxatts;
8121 atts = ctxt->atts;
8122 }
8123 atts[nbatts++] = attname;
8124 atts[nbatts++] = aprefix;
8125 if (aprefix == NULL)
8126 atts[nbatts++] = NULL;
8127 else
8128 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8129 atts[nbatts++] = defaults->values[4 * i + 2];
8130 atts[nbatts++] = defaults->values[4 * i + 3];
8131 nbdef++;
8132 }
8133 }
8134 }
8135 }
8136
Daniel Veillarde70c8772003-11-25 07:21:18 +00008137 /*
8138 * The attributes checkings
8139 */
8140 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008141 /*
8142 * The default namespace does not apply to attribute names.
8143 */
8144 if (atts[i + 1] != NULL) {
8145 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8146 if (nsname == NULL) {
8147 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8148 "Namespace prefix %s for %s on %s is not defined\n",
8149 atts[i + 1], atts[i], localname);
8150 }
8151 atts[i + 2] = nsname;
8152 } else
8153 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008154 /*
8155 * [ WFC: Unique Att Spec ]
8156 * No attribute name may appear more than once in the same
8157 * start-tag or empty-element tag.
8158 * As extended by the Namespace in XML REC.
8159 */
8160 for (j = 0; j < i;j += 5) {
8161 if (atts[i] == atts[j]) {
8162 if (atts[i+1] == atts[j+1]) {
8163 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8164 break;
8165 }
8166 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8167 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8168 "Namespaced Attribute %s in '%s' redefined\n",
8169 atts[i], nsname, NULL);
8170 break;
8171 }
8172 }
8173 }
8174 }
8175
Daniel Veillarde57ec792003-09-10 10:50:59 +00008176 nsname = xmlGetNamespace(ctxt, prefix);
8177 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008178 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8179 "Namespace prefix %s on %s is not defined\n",
8180 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008181 }
8182 *pref = prefix;
8183 *URI = nsname;
8184
8185 /*
8186 * SAX: Start of Element !
8187 */
8188 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8189 (!ctxt->disableSAX)) {
8190 if (nbNs > 0)
8191 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8192 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8193 nbatts / 5, nbdef, atts);
8194 else
8195 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8196 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8197 }
8198
8199 /*
8200 * Free up attribute allocated strings if needed
8201 */
8202 if (attval != 0) {
8203 for (i = 3,j = 0; j < nratts;i += 5,j++)
8204 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8205 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008206 }
8207
8208 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008209
8210base_changed:
8211 /*
8212 * the attribute strings are valid iif the base didn't changed
8213 */
8214 if (attval != 0) {
8215 for (i = 3,j = 0; j < nratts;i += 5,j++)
8216 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8217 xmlFree((xmlChar *) atts[i]);
8218 }
8219 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008220 ctxt->input->line = oldline;
8221 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008222 if (ctxt->wellFormed == 1) {
8223 goto reparse;
8224 }
8225 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008226}
8227
8228/**
8229 * xmlParseEndTag2:
8230 * @ctxt: an XML parser context
8231 * @line: line of the start tag
8232 * @nsNr: number of namespaces on the start tag
8233 *
8234 * parse an end of tag
8235 *
8236 * [42] ETag ::= '</' Name S? '>'
8237 *
8238 * With namespace
8239 *
8240 * [NS 9] ETag ::= '</' QName S? '>'
8241 */
8242
8243static void
8244xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008245 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 const xmlChar *name;
8247
8248 GROW;
8249 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008250 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008251 return;
8252 }
8253 SKIP(2);
8254
William M. Brack13dfa872004-09-18 04:52:08 +00008255 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008256 if (ctxt->input->cur[tlen] == '>') {
8257 ctxt->input->cur += tlen + 1;
8258 goto done;
8259 }
8260 ctxt->input->cur += tlen;
8261 name = (xmlChar*)1;
8262 } else {
8263 if (prefix == NULL)
8264 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8265 else
8266 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8267 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268
8269 /*
8270 * We should definitely be at the ending "S? '>'" part
8271 */
8272 GROW;
8273 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008274 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008275 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 } else
8277 NEXT1;
8278
8279 /*
8280 * [ WFC: Element Type Match ]
8281 * The Name in an element's end-tag must match the element type in the
8282 * start-tag.
8283 *
8284 */
8285 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008286 if (name == NULL) name = BAD_CAST "unparseable";
8287 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008289 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 }
8291
8292 /*
8293 * SAX: End of Tag
8294 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008295done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8297 (!ctxt->disableSAX))
8298 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8299
Daniel Veillard0fb18932003-09-07 09:14:37 +00008300 spacePop(ctxt);
8301 if (nsNr != 0)
8302 nsPop(ctxt, nsNr);
8303 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008304}
8305
8306/**
Owen Taylor3473f882001-02-23 17:55:21 +00008307 * xmlParseCDSect:
8308 * @ctxt: an XML parser context
8309 *
8310 * Parse escaped pure raw content.
8311 *
8312 * [18] CDSect ::= CDStart CData CDEnd
8313 *
8314 * [19] CDStart ::= '<![CDATA['
8315 *
8316 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8317 *
8318 * [21] CDEnd ::= ']]>'
8319 */
8320void
8321xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8322 xmlChar *buf = NULL;
8323 int len = 0;
8324 int size = XML_PARSER_BUFFER_SIZE;
8325 int r, rl;
8326 int s, sl;
8327 int cur, l;
8328 int count = 0;
8329
Daniel Veillard8f597c32003-10-06 08:19:27 +00008330 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008331 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008332 SKIP(9);
8333 } else
8334 return;
8335
8336 ctxt->instate = XML_PARSER_CDATA_SECTION;
8337 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008338 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008339 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 ctxt->instate = XML_PARSER_CONTENT;
8341 return;
8342 }
8343 NEXTL(rl);
8344 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008345 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008346 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008347 ctxt->instate = XML_PARSER_CONTENT;
8348 return;
8349 }
8350 NEXTL(sl);
8351 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008352 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008353 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008354 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008355 return;
8356 }
William M. Brack871611b2003-10-18 04:53:14 +00008357 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008358 ((r != ']') || (s != ']') || (cur != '>'))) {
8359 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008360 xmlChar *tmp;
8361
Owen Taylor3473f882001-02-23 17:55:21 +00008362 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008363 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8364 if (tmp == NULL) {
8365 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 return;
8368 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008369 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008370 }
8371 COPY_BUF(rl,buf,len,r);
8372 r = s;
8373 rl = sl;
8374 s = cur;
8375 sl = l;
8376 count++;
8377 if (count > 50) {
8378 GROW;
8379 count = 0;
8380 }
8381 NEXTL(l);
8382 cur = CUR_CHAR(l);
8383 }
8384 buf[len] = 0;
8385 ctxt->instate = XML_PARSER_CONTENT;
8386 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008387 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008388 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 xmlFree(buf);
8390 return;
8391 }
8392 NEXTL(l);
8393
8394 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008395 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008396 */
8397 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8398 if (ctxt->sax->cdataBlock != NULL)
8399 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008400 else if (ctxt->sax->characters != NULL)
8401 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008402 }
8403 xmlFree(buf);
8404}
8405
8406/**
8407 * xmlParseContent:
8408 * @ctxt: an XML parser context
8409 *
8410 * Parse a content:
8411 *
8412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8413 */
8414
8415void
8416xmlParseContent(xmlParserCtxtPtr ctxt) {
8417 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008418 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008419 ((RAW != '<') || (NXT(1) != '/')) &&
8420 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008421 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008422 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008423 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008424
8425 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008426 * First case : a Processing Instruction.
8427 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008428 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008429 xmlParsePI(ctxt);
8430 }
8431
8432 /*
8433 * Second case : a CDSection
8434 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008435 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008436 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008437 xmlParseCDSect(ctxt);
8438 }
8439
8440 /*
8441 * Third case : a comment
8442 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008443 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008444 (NXT(2) == '-') && (NXT(3) == '-')) {
8445 xmlParseComment(ctxt);
8446 ctxt->instate = XML_PARSER_CONTENT;
8447 }
8448
8449 /*
8450 * Fourth case : a sub-element.
8451 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008452 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008453 xmlParseElement(ctxt);
8454 }
8455
8456 /*
8457 * Fifth case : a reference. If if has not been resolved,
8458 * parsing returns it's Name, create the node
8459 */
8460
Daniel Veillard21a0f912001-02-25 19:54:14 +00008461 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008462 xmlParseReference(ctxt);
8463 }
8464
8465 /*
8466 * Last case, text. Note that References are handled directly.
8467 */
8468 else {
8469 xmlParseCharData(ctxt, 0);
8470 }
8471
8472 GROW;
8473 /*
8474 * Pop-up of finished entities.
8475 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008476 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008477 xmlPopInput(ctxt);
8478 SHRINK;
8479
Daniel Veillardfdc91562002-07-01 21:52:03 +00008480 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008481 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008483 ctxt->instate = XML_PARSER_EOF;
8484 break;
8485 }
8486 }
8487}
8488
8489/**
8490 * xmlParseElement:
8491 * @ctxt: an XML parser context
8492 *
8493 * parse an XML element, this is highly recursive
8494 *
8495 * [39] element ::= EmptyElemTag | STag content ETag
8496 *
8497 * [ WFC: Element Type Match ]
8498 * The Name in an element's end-tag must match the element type in the
8499 * start-tag.
8500 *
Owen Taylor3473f882001-02-23 17:55:21 +00008501 */
8502
8503void
8504xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008505 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008506 const xmlChar *prefix;
8507 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008508 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008509 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008511 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008512
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008513 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8514 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8515 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8516 xmlParserMaxDepth);
8517 ctxt->instate = XML_PARSER_EOF;
8518 return;
8519 }
8520
Owen Taylor3473f882001-02-23 17:55:21 +00008521 /* Capture start position */
8522 if (ctxt->record_info) {
8523 node_info.begin_pos = ctxt->input->consumed +
8524 (CUR_PTR - ctxt->input->base);
8525 node_info.begin_line = ctxt->input->line;
8526 }
8527
8528 if (ctxt->spaceNr == 0)
8529 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008530 else if (*ctxt->space == -2)
8531 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008532 else
8533 spacePush(ctxt, *ctxt->space);
8534
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008535 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008536#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008537 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008539 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008540#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 else
8542 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008543#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008544 if (name == NULL) {
8545 spacePop(ctxt);
8546 return;
8547 }
8548 namePush(ctxt, name);
8549 ret = ctxt->node;
8550
Daniel Veillard4432df22003-09-28 18:58:27 +00008551#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008552 /*
8553 * [ VC: Root Element Type ]
8554 * The Name in the document type declaration must match the element
8555 * type of the root element.
8556 */
8557 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8558 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8559 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008560#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008561
8562 /*
8563 * Check for an Empty Element.
8564 */
8565 if ((RAW == '/') && (NXT(1) == '>')) {
8566 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 if (ctxt->sax2) {
8568 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8569 (!ctxt->disableSAX))
8570 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008571#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008572 } else {
8573 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8574 (!ctxt->disableSAX))
8575 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008576#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008577 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008578 namePop(ctxt);
8579 spacePop(ctxt);
8580 if (nsNr != ctxt->nsNr)
8581 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008582 if ( ret != NULL && ctxt->record_info ) {
8583 node_info.end_pos = ctxt->input->consumed +
8584 (CUR_PTR - ctxt->input->base);
8585 node_info.end_line = ctxt->input->line;
8586 node_info.node = ret;
8587 xmlParserAddNodeInfo(ctxt, &node_info);
8588 }
8589 return;
8590 }
8591 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008592 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008593 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008594 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8595 "Couldn't find end of Start Tag %s line %d\n",
8596 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597
8598 /*
8599 * end of parsing of this node.
8600 */
8601 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008602 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008604 if (nsNr != ctxt->nsNr)
8605 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008606
8607 /*
8608 * Capture end position and add node
8609 */
8610 if ( ret != NULL && ctxt->record_info ) {
8611 node_info.end_pos = ctxt->input->consumed +
8612 (CUR_PTR - ctxt->input->base);
8613 node_info.end_line = ctxt->input->line;
8614 node_info.node = ret;
8615 xmlParserAddNodeInfo(ctxt, &node_info);
8616 }
8617 return;
8618 }
8619
8620 /*
8621 * Parse the content of the element:
8622 */
8623 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008624 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008625 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008626 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008627 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008628
8629 /*
8630 * end of parsing of this node.
8631 */
8632 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008633 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008634 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 if (nsNr != ctxt->nsNr)
8636 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008637 return;
8638 }
8639
8640 /*
8641 * parse the end of tag: '</' should be here.
8642 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008643 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008644 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008646 }
8647#ifdef LIBXML_SAX1_ENABLED
8648 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008650#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008651
8652 /*
8653 * Capture end position and add node
8654 */
8655 if ( ret != NULL && ctxt->record_info ) {
8656 node_info.end_pos = ctxt->input->consumed +
8657 (CUR_PTR - ctxt->input->base);
8658 node_info.end_line = ctxt->input->line;
8659 node_info.node = ret;
8660 xmlParserAddNodeInfo(ctxt, &node_info);
8661 }
8662}
8663
8664/**
8665 * xmlParseVersionNum:
8666 * @ctxt: an XML parser context
8667 *
8668 * parse the XML version value.
8669 *
8670 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8671 *
8672 * Returns the string giving the XML version number, or NULL
8673 */
8674xmlChar *
8675xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8676 xmlChar *buf = NULL;
8677 int len = 0;
8678 int size = 10;
8679 xmlChar cur;
8680
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008681 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008682 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008684 return(NULL);
8685 }
8686 cur = CUR;
8687 while (((cur >= 'a') && (cur <= 'z')) ||
8688 ((cur >= 'A') && (cur <= 'Z')) ||
8689 ((cur >= '0') && (cur <= '9')) ||
8690 (cur == '_') || (cur == '.') ||
8691 (cur == ':') || (cur == '-')) {
8692 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008693 xmlChar *tmp;
8694
Owen Taylor3473f882001-02-23 17:55:21 +00008695 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8697 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 return(NULL);
8700 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008701 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008702 }
8703 buf[len++] = cur;
8704 NEXT;
8705 cur=CUR;
8706 }
8707 buf[len] = 0;
8708 return(buf);
8709}
8710
8711/**
8712 * xmlParseVersionInfo:
8713 * @ctxt: an XML parser context
8714 *
8715 * parse the XML version.
8716 *
8717 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8718 *
8719 * [25] Eq ::= S? '=' S?
8720 *
8721 * Returns the version string, e.g. "1.0"
8722 */
8723
8724xmlChar *
8725xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8726 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008727
Daniel Veillarda07050d2003-10-19 14:46:32 +00008728 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008729 SKIP(7);
8730 SKIP_BLANKS;
8731 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008732 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008733 return(NULL);
8734 }
8735 NEXT;
8736 SKIP_BLANKS;
8737 if (RAW == '"') {
8738 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008739 version = xmlParseVersionNum(ctxt);
8740 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008741 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008742 } else
8743 NEXT;
8744 } else if (RAW == '\''){
8745 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008746 version = xmlParseVersionNum(ctxt);
8747 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008748 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 } else
8750 NEXT;
8751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 }
8754 }
8755 return(version);
8756}
8757
8758/**
8759 * xmlParseEncName:
8760 * @ctxt: an XML parser context
8761 *
8762 * parse the XML encoding name
8763 *
8764 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8765 *
8766 * Returns the encoding name value or NULL
8767 */
8768xmlChar *
8769xmlParseEncName(xmlParserCtxtPtr ctxt) {
8770 xmlChar *buf = NULL;
8771 int len = 0;
8772 int size = 10;
8773 xmlChar cur;
8774
8775 cur = CUR;
8776 if (((cur >= 'a') && (cur <= 'z')) ||
8777 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008779 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008780 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008781 return(NULL);
8782 }
8783
8784 buf[len++] = cur;
8785 NEXT;
8786 cur = CUR;
8787 while (((cur >= 'a') && (cur <= 'z')) ||
8788 ((cur >= 'A') && (cur <= 'Z')) ||
8789 ((cur >= '0') && (cur <= '9')) ||
8790 (cur == '.') || (cur == '_') ||
8791 (cur == '-')) {
8792 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008793 xmlChar *tmp;
8794
Owen Taylor3473f882001-02-23 17:55:21 +00008795 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8797 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008798 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008799 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008800 return(NULL);
8801 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008802 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008803 }
8804 buf[len++] = cur;
8805 NEXT;
8806 cur = CUR;
8807 if (cur == 0) {
8808 SHRINK;
8809 GROW;
8810 cur = CUR;
8811 }
8812 }
8813 buf[len] = 0;
8814 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008815 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008816 }
8817 return(buf);
8818}
8819
8820/**
8821 * xmlParseEncodingDecl:
8822 * @ctxt: an XML parser context
8823 *
8824 * parse the XML encoding declaration
8825 *
8826 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8827 *
8828 * this setups the conversion filters.
8829 *
8830 * Returns the encoding value or NULL
8831 */
8832
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008833const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008834xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8835 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008836
8837 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008838 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008839 SKIP(8);
8840 SKIP_BLANKS;
8841 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008842 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008843 return(NULL);
8844 }
8845 NEXT;
8846 SKIP_BLANKS;
8847 if (RAW == '"') {
8848 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008849 encoding = xmlParseEncName(ctxt);
8850 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008852 } else
8853 NEXT;
8854 } else if (RAW == '\''){
8855 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008856 encoding = xmlParseEncName(ctxt);
8857 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008859 } else
8860 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008861 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008862 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008863 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008864 /*
8865 * UTF-16 encoding stwich has already taken place at this stage,
8866 * more over the little-endian/big-endian selection is already done
8867 */
8868 if ((encoding != NULL) &&
8869 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8870 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008871 if (ctxt->encoding != NULL)
8872 xmlFree((xmlChar *) ctxt->encoding);
8873 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008874 }
8875 /*
8876 * UTF-8 encoding is handled natively
8877 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008878 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008879 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8880 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008881 if (ctxt->encoding != NULL)
8882 xmlFree((xmlChar *) ctxt->encoding);
8883 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008884 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008885 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008886 xmlCharEncodingHandlerPtr handler;
8887
8888 if (ctxt->input->encoding != NULL)
8889 xmlFree((xmlChar *) ctxt->input->encoding);
8890 ctxt->input->encoding = encoding;
8891
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008892 handler = xmlFindCharEncodingHandler((const char *) encoding);
8893 if (handler != NULL) {
8894 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008895 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008897 "Unsupported encoding %s\n", encoding);
8898 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008899 }
8900 }
8901 }
8902 return(encoding);
8903}
8904
8905/**
8906 * xmlParseSDDecl:
8907 * @ctxt: an XML parser context
8908 *
8909 * parse the XML standalone declaration
8910 *
8911 * [32] SDDecl ::= S 'standalone' Eq
8912 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8913 *
8914 * [ VC: Standalone Document Declaration ]
8915 * TODO The standalone document declaration must have the value "no"
8916 * if any external markup declarations contain declarations of:
8917 * - attributes with default values, if elements to which these
8918 * attributes apply appear in the document without specifications
8919 * of values for these attributes, or
8920 * - entities (other than amp, lt, gt, apos, quot), if references
8921 * to those entities appear in the document, or
8922 * - attributes with values subject to normalization, where the
8923 * attribute appears in the document with a value which will change
8924 * as a result of normalization, or
8925 * - element types with element content, if white space occurs directly
8926 * within any instance of those types.
8927 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008928 * Returns:
8929 * 1 if standalone="yes"
8930 * 0 if standalone="no"
8931 * -2 if standalone attribute is missing or invalid
8932 * (A standalone value of -2 means that the XML declaration was found,
8933 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00008934 */
8935
8936int
8937xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008938 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00008939
8940 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008941 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008942 SKIP(10);
8943 SKIP_BLANKS;
8944 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008945 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008946 return(standalone);
8947 }
8948 NEXT;
8949 SKIP_BLANKS;
8950 if (RAW == '\''){
8951 NEXT;
8952 if ((RAW == 'n') && (NXT(1) == 'o')) {
8953 standalone = 0;
8954 SKIP(2);
8955 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8956 (NXT(2) == 's')) {
8957 standalone = 1;
8958 SKIP(3);
8959 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008960 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008961 }
8962 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008963 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008964 } else
8965 NEXT;
8966 } else if (RAW == '"'){
8967 NEXT;
8968 if ((RAW == 'n') && (NXT(1) == 'o')) {
8969 standalone = 0;
8970 SKIP(2);
8971 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8972 (NXT(2) == 's')) {
8973 standalone = 1;
8974 SKIP(3);
8975 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008976 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008977 }
8978 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008979 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008980 } else
8981 NEXT;
8982 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008983 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008984 }
8985 }
8986 return(standalone);
8987}
8988
8989/**
8990 * xmlParseXMLDecl:
8991 * @ctxt: an XML parser context
8992 *
8993 * parse an XML declaration header
8994 *
8995 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8996 */
8997
8998void
8999xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9000 xmlChar *version;
9001
9002 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009003 * This value for standalone indicates that the document has an
9004 * XML declaration but it does not have a standalone attribute.
9005 * It will be overwritten later if a standalone attribute is found.
9006 */
9007 ctxt->input->standalone = -2;
9008
9009 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009010 * We know that '<?xml' is here.
9011 */
9012 SKIP(5);
9013
William M. Brack76e95df2003-10-18 16:20:14 +00009014 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009015 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9016 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009017 }
9018 SKIP_BLANKS;
9019
9020 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009021 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009022 */
9023 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009024 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009025 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009026 } else {
9027 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9028 /*
9029 * TODO: Blueberry should be detected here
9030 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009031 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9032 "Unsupported version '%s'\n",
9033 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009034 }
9035 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009036 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009037 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009038 }
Owen Taylor3473f882001-02-23 17:55:21 +00009039
9040 /*
9041 * We may have the encoding declaration
9042 */
William M. Brack76e95df2003-10-18 16:20:14 +00009043 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009044 if ((RAW == '?') && (NXT(1) == '>')) {
9045 SKIP(2);
9046 return;
9047 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009049 }
9050 xmlParseEncodingDecl(ctxt);
9051 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9052 /*
9053 * The XML REC instructs us to stop parsing right here
9054 */
9055 return;
9056 }
9057
9058 /*
9059 * We may have the standalone status.
9060 */
William M. Brack76e95df2003-10-18 16:20:14 +00009061 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009062 if ((RAW == '?') && (NXT(1) == '>')) {
9063 SKIP(2);
9064 return;
9065 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009066 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009067 }
9068 SKIP_BLANKS;
9069 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9070
9071 SKIP_BLANKS;
9072 if ((RAW == '?') && (NXT(1) == '>')) {
9073 SKIP(2);
9074 } else if (RAW == '>') {
9075 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009076 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009077 NEXT;
9078 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009079 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009080 MOVETO_ENDTAG(CUR_PTR);
9081 NEXT;
9082 }
9083}
9084
9085/**
9086 * xmlParseMisc:
9087 * @ctxt: an XML parser context
9088 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009089 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009090 *
9091 * [27] Misc ::= Comment | PI | S
9092 */
9093
9094void
9095xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009096 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009097 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009098 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009099 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009100 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009101 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009102 NEXT;
9103 } else
9104 xmlParseComment(ctxt);
9105 }
9106}
9107
9108/**
9109 * xmlParseDocument:
9110 * @ctxt: an XML parser context
9111 *
9112 * parse an XML document (and build a tree if using the standard SAX
9113 * interface).
9114 *
9115 * [1] document ::= prolog element Misc*
9116 *
9117 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9118 *
9119 * Returns 0, -1 in case of error. the parser context is augmented
9120 * as a result of the parsing.
9121 */
9122
9123int
9124xmlParseDocument(xmlParserCtxtPtr ctxt) {
9125 xmlChar start[4];
9126 xmlCharEncoding enc;
9127
9128 xmlInitParser();
9129
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009130 if ((ctxt == NULL) || (ctxt->input == NULL))
9131 return(-1);
9132
Owen Taylor3473f882001-02-23 17:55:21 +00009133 GROW;
9134
9135 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136 * SAX: detecting the level.
9137 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009138 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009139
9140 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009141 * SAX: beginning of the document processing.
9142 */
9143 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9144 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9145
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009146 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9147 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009148 /*
9149 * Get the 4 first bytes and decode the charset
9150 * if enc != XML_CHAR_ENCODING_NONE
9151 * plug some encoding conversion routines.
9152 */
9153 start[0] = RAW;
9154 start[1] = NXT(1);
9155 start[2] = NXT(2);
9156 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009157 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009158 if (enc != XML_CHAR_ENCODING_NONE) {
9159 xmlSwitchEncoding(ctxt, enc);
9160 }
Owen Taylor3473f882001-02-23 17:55:21 +00009161 }
9162
9163
9164 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009165 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009166 }
9167
9168 /*
9169 * Check for the XMLDecl in the Prolog.
9170 */
9171 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009172 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009173
9174 /*
9175 * Note that we will switch encoding on the fly.
9176 */
9177 xmlParseXMLDecl(ctxt);
9178 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9179 /*
9180 * The XML REC instructs us to stop parsing right here
9181 */
9182 return(-1);
9183 }
9184 ctxt->standalone = ctxt->input->standalone;
9185 SKIP_BLANKS;
9186 } else {
9187 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9188 }
9189 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9190 ctxt->sax->startDocument(ctxt->userData);
9191
9192 /*
9193 * The Misc part of the Prolog
9194 */
9195 GROW;
9196 xmlParseMisc(ctxt);
9197
9198 /*
9199 * Then possibly doc type declaration(s) and more Misc
9200 * (doctypedecl Misc*)?
9201 */
9202 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009203 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009204
9205 ctxt->inSubset = 1;
9206 xmlParseDocTypeDecl(ctxt);
9207 if (RAW == '[') {
9208 ctxt->instate = XML_PARSER_DTD;
9209 xmlParseInternalSubset(ctxt);
9210 }
9211
9212 /*
9213 * Create and update the external subset.
9214 */
9215 ctxt->inSubset = 2;
9216 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9217 (!ctxt->disableSAX))
9218 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9219 ctxt->extSubSystem, ctxt->extSubURI);
9220 ctxt->inSubset = 0;
9221
9222
9223 ctxt->instate = XML_PARSER_PROLOG;
9224 xmlParseMisc(ctxt);
9225 }
9226
9227 /*
9228 * Time to start parsing the tree itself
9229 */
9230 GROW;
9231 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009232 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9233 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009234 } else {
9235 ctxt->instate = XML_PARSER_CONTENT;
9236 xmlParseElement(ctxt);
9237 ctxt->instate = XML_PARSER_EPILOG;
9238
9239
9240 /*
9241 * The Misc part at the end
9242 */
9243 xmlParseMisc(ctxt);
9244
Daniel Veillard561b7f82002-03-20 21:55:57 +00009245 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009246 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009247 }
9248 ctxt->instate = XML_PARSER_EOF;
9249 }
9250
9251 /*
9252 * SAX: end of the document processing.
9253 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009254 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009255 ctxt->sax->endDocument(ctxt->userData);
9256
Daniel Veillard5997aca2002-03-18 18:36:20 +00009257 /*
9258 * Remove locally kept entity definitions if the tree was not built
9259 */
9260 if ((ctxt->myDoc != NULL) &&
9261 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9262 xmlFreeDoc(ctxt->myDoc);
9263 ctxt->myDoc = NULL;
9264 }
9265
Daniel Veillardc7612992002-02-17 22:47:37 +00009266 if (! ctxt->wellFormed) {
9267 ctxt->valid = 0;
9268 return(-1);
9269 }
Owen Taylor3473f882001-02-23 17:55:21 +00009270 return(0);
9271}
9272
9273/**
9274 * xmlParseExtParsedEnt:
9275 * @ctxt: an XML parser context
9276 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009277 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009278 * An external general parsed entity is well-formed if it matches the
9279 * production labeled extParsedEnt.
9280 *
9281 * [78] extParsedEnt ::= TextDecl? content
9282 *
9283 * Returns 0, -1 in case of error. the parser context is augmented
9284 * as a result of the parsing.
9285 */
9286
9287int
9288xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9289 xmlChar start[4];
9290 xmlCharEncoding enc;
9291
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009292 if ((ctxt == NULL) || (ctxt->input == NULL))
9293 return(-1);
9294
Owen Taylor3473f882001-02-23 17:55:21 +00009295 xmlDefaultSAXHandlerInit();
9296
Daniel Veillard309f81d2003-09-23 09:02:53 +00009297 xmlDetectSAX2(ctxt);
9298
Owen Taylor3473f882001-02-23 17:55:21 +00009299 GROW;
9300
9301 /*
9302 * SAX: beginning of the document processing.
9303 */
9304 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9305 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9306
9307 /*
9308 * Get the 4 first bytes and decode the charset
9309 * if enc != XML_CHAR_ENCODING_NONE
9310 * plug some encoding conversion routines.
9311 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009312 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9313 start[0] = RAW;
9314 start[1] = NXT(1);
9315 start[2] = NXT(2);
9316 start[3] = NXT(3);
9317 enc = xmlDetectCharEncoding(start, 4);
9318 if (enc != XML_CHAR_ENCODING_NONE) {
9319 xmlSwitchEncoding(ctxt, enc);
9320 }
Owen Taylor3473f882001-02-23 17:55:21 +00009321 }
9322
9323
9324 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009326 }
9327
9328 /*
9329 * Check for the XMLDecl in the Prolog.
9330 */
9331 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009332 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009333
9334 /*
9335 * Note that we will switch encoding on the fly.
9336 */
9337 xmlParseXMLDecl(ctxt);
9338 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9339 /*
9340 * The XML REC instructs us to stop parsing right here
9341 */
9342 return(-1);
9343 }
9344 SKIP_BLANKS;
9345 } else {
9346 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9347 }
9348 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9349 ctxt->sax->startDocument(ctxt->userData);
9350
9351 /*
9352 * Doing validity checking on chunk doesn't make sense
9353 */
9354 ctxt->instate = XML_PARSER_CONTENT;
9355 ctxt->validate = 0;
9356 ctxt->loadsubset = 0;
9357 ctxt->depth = 0;
9358
9359 xmlParseContent(ctxt);
9360
9361 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009362 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009363 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009364 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009365 }
9366
9367 /*
9368 * SAX: end of the document processing.
9369 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009370 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009371 ctxt->sax->endDocument(ctxt->userData);
9372
9373 if (! ctxt->wellFormed) return(-1);
9374 return(0);
9375}
9376
Daniel Veillard73b013f2003-09-30 12:36:01 +00009377#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009378/************************************************************************
9379 * *
9380 * Progressive parsing interfaces *
9381 * *
9382 ************************************************************************/
9383
9384/**
9385 * xmlParseLookupSequence:
9386 * @ctxt: an XML parser context
9387 * @first: the first char to lookup
9388 * @next: the next char to lookup or zero
9389 * @third: the next char to lookup or zero
9390 *
9391 * Try to find if a sequence (first, next, third) or just (first next) or
9392 * (first) is available in the input stream.
9393 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9394 * to avoid rescanning sequences of bytes, it DOES change the state of the
9395 * parser, do not use liberally.
9396 *
9397 * Returns the index to the current parsing point if the full sequence
9398 * is available, -1 otherwise.
9399 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009400static int
Owen Taylor3473f882001-02-23 17:55:21 +00009401xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9402 xmlChar next, xmlChar third) {
9403 int base, len;
9404 xmlParserInputPtr in;
9405 const xmlChar *buf;
9406
9407 in = ctxt->input;
9408 if (in == NULL) return(-1);
9409 base = in->cur - in->base;
9410 if (base < 0) return(-1);
9411 if (ctxt->checkIndex > base)
9412 base = ctxt->checkIndex;
9413 if (in->buf == NULL) {
9414 buf = in->base;
9415 len = in->length;
9416 } else {
9417 buf = in->buf->buffer->content;
9418 len = in->buf->buffer->use;
9419 }
9420 /* take into account the sequence length */
9421 if (third) len -= 2;
9422 else if (next) len --;
9423 for (;base < len;base++) {
9424 if (buf[base] == first) {
9425 if (third != 0) {
9426 if ((buf[base + 1] != next) ||
9427 (buf[base + 2] != third)) continue;
9428 } else if (next != 0) {
9429 if (buf[base + 1] != next) continue;
9430 }
9431 ctxt->checkIndex = 0;
9432#ifdef DEBUG_PUSH
9433 if (next == 0)
9434 xmlGenericError(xmlGenericErrorContext,
9435 "PP: lookup '%c' found at %d\n",
9436 first, base);
9437 else if (third == 0)
9438 xmlGenericError(xmlGenericErrorContext,
9439 "PP: lookup '%c%c' found at %d\n",
9440 first, next, base);
9441 else
9442 xmlGenericError(xmlGenericErrorContext,
9443 "PP: lookup '%c%c%c' found at %d\n",
9444 first, next, third, base);
9445#endif
9446 return(base - (in->cur - in->base));
9447 }
9448 }
9449 ctxt->checkIndex = base;
9450#ifdef DEBUG_PUSH
9451 if (next == 0)
9452 xmlGenericError(xmlGenericErrorContext,
9453 "PP: lookup '%c' failed\n", first);
9454 else if (third == 0)
9455 xmlGenericError(xmlGenericErrorContext,
9456 "PP: lookup '%c%c' failed\n", first, next);
9457 else
9458 xmlGenericError(xmlGenericErrorContext,
9459 "PP: lookup '%c%c%c' failed\n", first, next, third);
9460#endif
9461 return(-1);
9462}
9463
9464/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009465 * xmlParseGetLasts:
9466 * @ctxt: an XML parser context
9467 * @lastlt: pointer to store the last '<' from the input
9468 * @lastgt: pointer to store the last '>' from the input
9469 *
9470 * Lookup the last < and > in the current chunk
9471 */
9472static void
9473xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9474 const xmlChar **lastgt) {
9475 const xmlChar *tmp;
9476
9477 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9478 xmlGenericError(xmlGenericErrorContext,
9479 "Internal error: xmlParseGetLasts\n");
9480 return;
9481 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009482 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009483 tmp = ctxt->input->end;
9484 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009485 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009486 if (tmp < ctxt->input->base) {
9487 *lastlt = NULL;
9488 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009489 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009490 *lastlt = tmp;
9491 tmp++;
9492 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9493 if (*tmp == '\'') {
9494 tmp++;
9495 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9496 if (tmp < ctxt->input->end) tmp++;
9497 } else if (*tmp == '"') {
9498 tmp++;
9499 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9500 if (tmp < ctxt->input->end) tmp++;
9501 } else
9502 tmp++;
9503 }
9504 if (tmp < ctxt->input->end)
9505 *lastgt = tmp;
9506 else {
9507 tmp = *lastlt;
9508 tmp--;
9509 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9510 if (tmp >= ctxt->input->base)
9511 *lastgt = tmp;
9512 else
9513 *lastgt = NULL;
9514 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009515 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009516 } else {
9517 *lastlt = NULL;
9518 *lastgt = NULL;
9519 }
9520}
9521/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009522 * xmlCheckCdataPush:
9523 * @cur: pointer to the bock of characters
9524 * @len: length of the block in bytes
9525 *
9526 * Check that the block of characters is okay as SCdata content [20]
9527 *
9528 * Returns the number of bytes to pass if okay, a negative index where an
9529 * UTF-8 error occured otherwise
9530 */
9531static int
9532xmlCheckCdataPush(const xmlChar *utf, int len) {
9533 int ix;
9534 unsigned char c;
9535 int codepoint;
9536
9537 if ((utf == NULL) || (len <= 0))
9538 return(0);
9539
9540 for (ix = 0; ix < len;) { /* string is 0-terminated */
9541 c = utf[ix];
9542 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9543 if (c >= 0x20)
9544 ix++;
9545 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9546 ix++;
9547 else
9548 return(-ix);
9549 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9550 if (ix + 2 > len) return(ix);
9551 if ((utf[ix+1] & 0xc0 ) != 0x80)
9552 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009553 codepoint = (utf[ix] & 0x1f) << 6;
9554 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009555 if (!xmlIsCharQ(codepoint))
9556 return(-ix);
9557 ix += 2;
9558 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9559 if (ix + 3 > len) return(ix);
9560 if (((utf[ix+1] & 0xc0) != 0x80) ||
9561 ((utf[ix+2] & 0xc0) != 0x80))
9562 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009563 codepoint = (utf[ix] & 0xf) << 12;
9564 codepoint |= (utf[ix+1] & 0x3f) << 6;
9565 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009566 if (!xmlIsCharQ(codepoint))
9567 return(-ix);
9568 ix += 3;
9569 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9570 if (ix + 4 > len) return(ix);
9571 if (((utf[ix+1] & 0xc0) != 0x80) ||
9572 ((utf[ix+2] & 0xc0) != 0x80) ||
9573 ((utf[ix+3] & 0xc0) != 0x80))
9574 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009575 codepoint = (utf[ix] & 0x7) << 18;
9576 codepoint |= (utf[ix+1] & 0x3f) << 12;
9577 codepoint |= (utf[ix+2] & 0x3f) << 6;
9578 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009579 if (!xmlIsCharQ(codepoint))
9580 return(-ix);
9581 ix += 4;
9582 } else /* unknown encoding */
9583 return(-ix);
9584 }
9585 return(ix);
9586}
9587
9588/**
Owen Taylor3473f882001-02-23 17:55:21 +00009589 * xmlParseTryOrFinish:
9590 * @ctxt: an XML parser context
9591 * @terminate: last chunk indicator
9592 *
9593 * Try to progress on parsing
9594 *
9595 * Returns zero if no parsing was possible
9596 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009597static int
Owen Taylor3473f882001-02-23 17:55:21 +00009598xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9599 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009600 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009601 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009602 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009603
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009604 if (ctxt->input == NULL)
9605 return(0);
9606
Owen Taylor3473f882001-02-23 17:55:21 +00009607#ifdef DEBUG_PUSH
9608 switch (ctxt->instate) {
9609 case XML_PARSER_EOF:
9610 xmlGenericError(xmlGenericErrorContext,
9611 "PP: try EOF\n"); break;
9612 case XML_PARSER_START:
9613 xmlGenericError(xmlGenericErrorContext,
9614 "PP: try START\n"); break;
9615 case XML_PARSER_MISC:
9616 xmlGenericError(xmlGenericErrorContext,
9617 "PP: try MISC\n");break;
9618 case XML_PARSER_COMMENT:
9619 xmlGenericError(xmlGenericErrorContext,
9620 "PP: try COMMENT\n");break;
9621 case XML_PARSER_PROLOG:
9622 xmlGenericError(xmlGenericErrorContext,
9623 "PP: try PROLOG\n");break;
9624 case XML_PARSER_START_TAG:
9625 xmlGenericError(xmlGenericErrorContext,
9626 "PP: try START_TAG\n");break;
9627 case XML_PARSER_CONTENT:
9628 xmlGenericError(xmlGenericErrorContext,
9629 "PP: try CONTENT\n");break;
9630 case XML_PARSER_CDATA_SECTION:
9631 xmlGenericError(xmlGenericErrorContext,
9632 "PP: try CDATA_SECTION\n");break;
9633 case XML_PARSER_END_TAG:
9634 xmlGenericError(xmlGenericErrorContext,
9635 "PP: try END_TAG\n");break;
9636 case XML_PARSER_ENTITY_DECL:
9637 xmlGenericError(xmlGenericErrorContext,
9638 "PP: try ENTITY_DECL\n");break;
9639 case XML_PARSER_ENTITY_VALUE:
9640 xmlGenericError(xmlGenericErrorContext,
9641 "PP: try ENTITY_VALUE\n");break;
9642 case XML_PARSER_ATTRIBUTE_VALUE:
9643 xmlGenericError(xmlGenericErrorContext,
9644 "PP: try ATTRIBUTE_VALUE\n");break;
9645 case XML_PARSER_DTD:
9646 xmlGenericError(xmlGenericErrorContext,
9647 "PP: try DTD\n");break;
9648 case XML_PARSER_EPILOG:
9649 xmlGenericError(xmlGenericErrorContext,
9650 "PP: try EPILOG\n");break;
9651 case XML_PARSER_PI:
9652 xmlGenericError(xmlGenericErrorContext,
9653 "PP: try PI\n");break;
9654 case XML_PARSER_IGNORE:
9655 xmlGenericError(xmlGenericErrorContext,
9656 "PP: try IGNORE\n");break;
9657 }
9658#endif
9659
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009660 if ((ctxt->input != NULL) &&
9661 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009662 xmlSHRINK(ctxt);
9663 ctxt->checkIndex = 0;
9664 }
9665 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009666
Daniel Veillarda880b122003-04-21 21:36:41 +00009667 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009668 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009669 return(0);
9670
9671
Owen Taylor3473f882001-02-23 17:55:21 +00009672 /*
9673 * Pop-up of finished entities.
9674 */
9675 while ((RAW == 0) && (ctxt->inputNr > 1))
9676 xmlPopInput(ctxt);
9677
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009678 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009679 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 avail = ctxt->input->length -
9681 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009682 else {
9683 /*
9684 * If we are operating on converted input, try to flush
9685 * remainng chars to avoid them stalling in the non-converted
9686 * buffer.
9687 */
9688 if ((ctxt->input->buf->raw != NULL) &&
9689 (ctxt->input->buf->raw->use > 0)) {
9690 int base = ctxt->input->base -
9691 ctxt->input->buf->buffer->content;
9692 int current = ctxt->input->cur - ctxt->input->base;
9693
9694 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9695 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9696 ctxt->input->cur = ctxt->input->base + current;
9697 ctxt->input->end =
9698 &ctxt->input->buf->buffer->content[
9699 ctxt->input->buf->buffer->use];
9700 }
9701 avail = ctxt->input->buf->buffer->use -
9702 (ctxt->input->cur - ctxt->input->base);
9703 }
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if (avail < 1)
9705 goto done;
9706 switch (ctxt->instate) {
9707 case XML_PARSER_EOF:
9708 /*
9709 * Document parsing is done !
9710 */
9711 goto done;
9712 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009713 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9714 xmlChar start[4];
9715 xmlCharEncoding enc;
9716
9717 /*
9718 * Very first chars read from the document flow.
9719 */
9720 if (avail < 4)
9721 goto done;
9722
9723 /*
9724 * Get the 4 first bytes and decode the charset
9725 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009726 * plug some encoding conversion routines,
9727 * else xmlSwitchEncoding will set to (default)
9728 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009729 */
9730 start[0] = RAW;
9731 start[1] = NXT(1);
9732 start[2] = NXT(2);
9733 start[3] = NXT(3);
9734 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009735 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009736 break;
9737 }
Owen Taylor3473f882001-02-23 17:55:21 +00009738
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009739 if (avail < 2)
9740 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009741 cur = ctxt->input->cur[0];
9742 next = ctxt->input->cur[1];
9743 if (cur == 0) {
9744 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9745 ctxt->sax->setDocumentLocator(ctxt->userData,
9746 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009747 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009748 ctxt->instate = XML_PARSER_EOF;
9749#ifdef DEBUG_PUSH
9750 xmlGenericError(xmlGenericErrorContext,
9751 "PP: entering EOF\n");
9752#endif
9753 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9754 ctxt->sax->endDocument(ctxt->userData);
9755 goto done;
9756 }
9757 if ((cur == '<') && (next == '?')) {
9758 /* PI or XML decl */
9759 if (avail < 5) return(ret);
9760 if ((!terminate) &&
9761 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9762 return(ret);
9763 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9764 ctxt->sax->setDocumentLocator(ctxt->userData,
9765 &xmlDefaultSAXLocator);
9766 if ((ctxt->input->cur[2] == 'x') &&
9767 (ctxt->input->cur[3] == 'm') &&
9768 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009769 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009770 ret += 5;
9771#ifdef DEBUG_PUSH
9772 xmlGenericError(xmlGenericErrorContext,
9773 "PP: Parsing XML Decl\n");
9774#endif
9775 xmlParseXMLDecl(ctxt);
9776 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9777 /*
9778 * The XML REC instructs us to stop parsing right
9779 * here
9780 */
9781 ctxt->instate = XML_PARSER_EOF;
9782 return(0);
9783 }
9784 ctxt->standalone = ctxt->input->standalone;
9785 if ((ctxt->encoding == NULL) &&
9786 (ctxt->input->encoding != NULL))
9787 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9788 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9789 (!ctxt->disableSAX))
9790 ctxt->sax->startDocument(ctxt->userData);
9791 ctxt->instate = XML_PARSER_MISC;
9792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: entering MISC\n");
9795#endif
9796 } else {
9797 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9798 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9799 (!ctxt->disableSAX))
9800 ctxt->sax->startDocument(ctxt->userData);
9801 ctxt->instate = XML_PARSER_MISC;
9802#ifdef DEBUG_PUSH
9803 xmlGenericError(xmlGenericErrorContext,
9804 "PP: entering MISC\n");
9805#endif
9806 }
9807 } else {
9808 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9809 ctxt->sax->setDocumentLocator(ctxt->userData,
9810 &xmlDefaultSAXLocator);
9811 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009812 if (ctxt->version == NULL) {
9813 xmlErrMemory(ctxt, NULL);
9814 break;
9815 }
Owen Taylor3473f882001-02-23 17:55:21 +00009816 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9817 (!ctxt->disableSAX))
9818 ctxt->sax->startDocument(ctxt->userData);
9819 ctxt->instate = XML_PARSER_MISC;
9820#ifdef DEBUG_PUSH
9821 xmlGenericError(xmlGenericErrorContext,
9822 "PP: entering MISC\n");
9823#endif
9824 }
9825 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009826 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009827 const xmlChar *name;
9828 const xmlChar *prefix;
9829 const xmlChar *URI;
9830 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009831
9832 if ((avail < 2) && (ctxt->inputNr == 1))
9833 goto done;
9834 cur = ctxt->input->cur[0];
9835 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009837 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9839 ctxt->sax->endDocument(ctxt->userData);
9840 goto done;
9841 }
9842 if (!terminate) {
9843 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009844 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009845 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009846 goto done;
9847 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9848 goto done;
9849 }
9850 }
9851 if (ctxt->spaceNr == 0)
9852 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009853 else if (*ctxt->space == -2)
9854 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009855 else
9856 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009857#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009858 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009859#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009860 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009861#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009862 else
9863 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009864#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009865 if (name == NULL) {
9866 spacePop(ctxt);
9867 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009868 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9869 ctxt->sax->endDocument(ctxt->userData);
9870 goto done;
9871 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009872#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009873 /*
9874 * [ VC: Root Element Type ]
9875 * The Name in the document type declaration must match
9876 * the element type of the root element.
9877 */
9878 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9879 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9880 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009881#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009882
9883 /*
9884 * Check for an Empty Element.
9885 */
9886 if ((RAW == '/') && (NXT(1) == '>')) {
9887 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009888
9889 if (ctxt->sax2) {
9890 if ((ctxt->sax != NULL) &&
9891 (ctxt->sax->endElementNs != NULL) &&
9892 (!ctxt->disableSAX))
9893 ctxt->sax->endElementNs(ctxt->userData, name,
9894 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009895 if (ctxt->nsNr - nsNr > 0)
9896 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009897#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009898 } else {
9899 if ((ctxt->sax != NULL) &&
9900 (ctxt->sax->endElement != NULL) &&
9901 (!ctxt->disableSAX))
9902 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009903#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009905 spacePop(ctxt);
9906 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009907 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009908 } else {
9909 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009910 }
9911 break;
9912 }
9913 if (RAW == '>') {
9914 NEXT;
9915 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009916 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009917 "Couldn't find end of Start Tag %s\n",
9918 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009919 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009920 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009921 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009922 if (ctxt->sax2)
9923 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009924#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009925 else
9926 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009927#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009928
Daniel Veillarda880b122003-04-21 21:36:41 +00009929 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009930 break;
9931 }
9932 case XML_PARSER_CONTENT: {
9933 const xmlChar *test;
9934 unsigned int cons;
9935 if ((avail < 2) && (ctxt->inputNr == 1))
9936 goto done;
9937 cur = ctxt->input->cur[0];
9938 next = ctxt->input->cur[1];
9939
9940 test = CUR_PTR;
9941 cons = ctxt->input->consumed;
9942 if ((cur == '<') && (next == '/')) {
9943 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009944 break;
9945 } else if ((cur == '<') && (next == '?')) {
9946 if ((!terminate) &&
9947 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9948 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009949 xmlParsePI(ctxt);
9950 } else if ((cur == '<') && (next != '!')) {
9951 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009952 break;
9953 } else if ((cur == '<') && (next == '!') &&
9954 (ctxt->input->cur[2] == '-') &&
9955 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009956 int term;
9957
9958 if (avail < 4)
9959 goto done;
9960 ctxt->input->cur += 4;
9961 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9962 ctxt->input->cur -= 4;
9963 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009964 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009965 xmlParseComment(ctxt);
9966 ctxt->instate = XML_PARSER_CONTENT;
9967 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9968 (ctxt->input->cur[2] == '[') &&
9969 (ctxt->input->cur[3] == 'C') &&
9970 (ctxt->input->cur[4] == 'D') &&
9971 (ctxt->input->cur[5] == 'A') &&
9972 (ctxt->input->cur[6] == 'T') &&
9973 (ctxt->input->cur[7] == 'A') &&
9974 (ctxt->input->cur[8] == '[')) {
9975 SKIP(9);
9976 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009977 break;
9978 } else if ((cur == '<') && (next == '!') &&
9979 (avail < 9)) {
9980 goto done;
9981 } else if (cur == '&') {
9982 if ((!terminate) &&
9983 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9984 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009985 xmlParseReference(ctxt);
9986 } else {
9987 /* TODO Avoid the extra copy, handle directly !!! */
9988 /*
9989 * Goal of the following test is:
9990 * - minimize calls to the SAX 'character' callback
9991 * when they are mergeable
9992 * - handle an problem for isBlank when we only parse
9993 * a sequence of blank chars and the next one is
9994 * not available to check against '<' presence.
9995 * - tries to homogenize the differences in SAX
9996 * callbacks between the push and pull versions
9997 * of the parser.
9998 */
9999 if ((ctxt->inputNr == 1) &&
10000 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10001 if (!terminate) {
10002 if (ctxt->progressive) {
10003 if ((lastlt == NULL) ||
10004 (ctxt->input->cur > lastlt))
10005 goto done;
10006 } else if (xmlParseLookupSequence(ctxt,
10007 '<', 0, 0) < 0) {
10008 goto done;
10009 }
10010 }
10011 }
10012 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010013 xmlParseCharData(ctxt, 0);
10014 }
10015 /*
10016 * Pop-up of finished entities.
10017 */
10018 while ((RAW == 0) && (ctxt->inputNr > 1))
10019 xmlPopInput(ctxt);
10020 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010021 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10022 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010023 ctxt->instate = XML_PARSER_EOF;
10024 break;
10025 }
10026 break;
10027 }
10028 case XML_PARSER_END_TAG:
10029 if (avail < 2)
10030 goto done;
10031 if (!terminate) {
10032 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010033 /* > can be found unescaped in attribute values */
10034 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 goto done;
10036 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10037 goto done;
10038 }
10039 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010040 if (ctxt->sax2) {
10041 xmlParseEndTag2(ctxt,
10042 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10043 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010044 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010045 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010046 }
10047#ifdef LIBXML_SAX1_ENABLED
10048 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010049 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010050#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010051 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010052 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010053 } else {
10054 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010055 }
10056 break;
10057 case XML_PARSER_CDATA_SECTION: {
10058 /*
10059 * The Push mode need to have the SAX callback for
10060 * cdataBlock merge back contiguous callbacks.
10061 */
10062 int base;
10063
10064 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10065 if (base < 0) {
10066 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010067 int tmp;
10068
10069 tmp = xmlCheckCdataPush(ctxt->input->cur,
10070 XML_PARSER_BIG_BUFFER_SIZE);
10071 if (tmp < 0) {
10072 tmp = -tmp;
10073 ctxt->input->cur += tmp;
10074 goto encoding_error;
10075 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010076 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10077 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010078 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010079 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010080 else if (ctxt->sax->characters != NULL)
10081 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010082 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010083 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010084 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010085 ctxt->checkIndex = 0;
10086 }
10087 goto done;
10088 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010089 int tmp;
10090
10091 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10092 if ((tmp < 0) || (tmp != base)) {
10093 tmp = -tmp;
10094 ctxt->input->cur += tmp;
10095 goto encoding_error;
10096 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010097 if ((ctxt->sax != NULL) && (base > 0) &&
10098 (!ctxt->disableSAX)) {
10099 if (ctxt->sax->cdataBlock != NULL)
10100 ctxt->sax->cdataBlock(ctxt->userData,
10101 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010102 else if (ctxt->sax->characters != NULL)
10103 ctxt->sax->characters(ctxt->userData,
10104 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010105 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010106 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010107 ctxt->checkIndex = 0;
10108 ctxt->instate = XML_PARSER_CONTENT;
10109#ifdef DEBUG_PUSH
10110 xmlGenericError(xmlGenericErrorContext,
10111 "PP: entering CONTENT\n");
10112#endif
10113 }
10114 break;
10115 }
Owen Taylor3473f882001-02-23 17:55:21 +000010116 case XML_PARSER_MISC:
10117 SKIP_BLANKS;
10118 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010119 avail = ctxt->input->length -
10120 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010121 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010122 avail = ctxt->input->buf->buffer->use -
10123 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010124 if (avail < 2)
10125 goto done;
10126 cur = ctxt->input->cur[0];
10127 next = ctxt->input->cur[1];
10128 if ((cur == '<') && (next == '?')) {
10129 if ((!terminate) &&
10130 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10131 goto done;
10132#ifdef DEBUG_PUSH
10133 xmlGenericError(xmlGenericErrorContext,
10134 "PP: Parsing PI\n");
10135#endif
10136 xmlParsePI(ctxt);
10137 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010138 (ctxt->input->cur[2] == '-') &&
10139 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010140 if ((!terminate) &&
10141 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10142 goto done;
10143#ifdef DEBUG_PUSH
10144 xmlGenericError(xmlGenericErrorContext,
10145 "PP: Parsing Comment\n");
10146#endif
10147 xmlParseComment(ctxt);
10148 ctxt->instate = XML_PARSER_MISC;
10149 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010150 (ctxt->input->cur[2] == 'D') &&
10151 (ctxt->input->cur[3] == 'O') &&
10152 (ctxt->input->cur[4] == 'C') &&
10153 (ctxt->input->cur[5] == 'T') &&
10154 (ctxt->input->cur[6] == 'Y') &&
10155 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010156 (ctxt->input->cur[8] == 'E')) {
10157 if ((!terminate) &&
10158 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10159 goto done;
10160#ifdef DEBUG_PUSH
10161 xmlGenericError(xmlGenericErrorContext,
10162 "PP: Parsing internal subset\n");
10163#endif
10164 ctxt->inSubset = 1;
10165 xmlParseDocTypeDecl(ctxt);
10166 if (RAW == '[') {
10167 ctxt->instate = XML_PARSER_DTD;
10168#ifdef DEBUG_PUSH
10169 xmlGenericError(xmlGenericErrorContext,
10170 "PP: entering DTD\n");
10171#endif
10172 } else {
10173 /*
10174 * Create and update the external subset.
10175 */
10176 ctxt->inSubset = 2;
10177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10178 (ctxt->sax->externalSubset != NULL))
10179 ctxt->sax->externalSubset(ctxt->userData,
10180 ctxt->intSubName, ctxt->extSubSystem,
10181 ctxt->extSubURI);
10182 ctxt->inSubset = 0;
10183 ctxt->instate = XML_PARSER_PROLOG;
10184#ifdef DEBUG_PUSH
10185 xmlGenericError(xmlGenericErrorContext,
10186 "PP: entering PROLOG\n");
10187#endif
10188 }
10189 } else if ((cur == '<') && (next == '!') &&
10190 (avail < 9)) {
10191 goto done;
10192 } else {
10193 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010194 ctxt->progressive = 1;
10195 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010196#ifdef DEBUG_PUSH
10197 xmlGenericError(xmlGenericErrorContext,
10198 "PP: entering START_TAG\n");
10199#endif
10200 }
10201 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010202 case XML_PARSER_PROLOG:
10203 SKIP_BLANKS;
10204 if (ctxt->input->buf == NULL)
10205 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10206 else
10207 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10208 if (avail < 2)
10209 goto done;
10210 cur = ctxt->input->cur[0];
10211 next = ctxt->input->cur[1];
10212 if ((cur == '<') && (next == '?')) {
10213 if ((!terminate) &&
10214 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10215 goto done;
10216#ifdef DEBUG_PUSH
10217 xmlGenericError(xmlGenericErrorContext,
10218 "PP: Parsing PI\n");
10219#endif
10220 xmlParsePI(ctxt);
10221 } else if ((cur == '<') && (next == '!') &&
10222 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10223 if ((!terminate) &&
10224 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10225 goto done;
10226#ifdef DEBUG_PUSH
10227 xmlGenericError(xmlGenericErrorContext,
10228 "PP: Parsing Comment\n");
10229#endif
10230 xmlParseComment(ctxt);
10231 ctxt->instate = XML_PARSER_PROLOG;
10232 } else if ((cur == '<') && (next == '!') &&
10233 (avail < 4)) {
10234 goto done;
10235 } else {
10236 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010237 if (ctxt->progressive == 0)
10238 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010239 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010240#ifdef DEBUG_PUSH
10241 xmlGenericError(xmlGenericErrorContext,
10242 "PP: entering START_TAG\n");
10243#endif
10244 }
10245 break;
10246 case XML_PARSER_EPILOG:
10247 SKIP_BLANKS;
10248 if (ctxt->input->buf == NULL)
10249 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10250 else
10251 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10252 if (avail < 2)
10253 goto done;
10254 cur = ctxt->input->cur[0];
10255 next = ctxt->input->cur[1];
10256 if ((cur == '<') && (next == '?')) {
10257 if ((!terminate) &&
10258 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10259 goto done;
10260#ifdef DEBUG_PUSH
10261 xmlGenericError(xmlGenericErrorContext,
10262 "PP: Parsing PI\n");
10263#endif
10264 xmlParsePI(ctxt);
10265 ctxt->instate = XML_PARSER_EPILOG;
10266 } else if ((cur == '<') && (next == '!') &&
10267 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10268 if ((!terminate) &&
10269 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10270 goto done;
10271#ifdef DEBUG_PUSH
10272 xmlGenericError(xmlGenericErrorContext,
10273 "PP: Parsing Comment\n");
10274#endif
10275 xmlParseComment(ctxt);
10276 ctxt->instate = XML_PARSER_EPILOG;
10277 } else if ((cur == '<') && (next == '!') &&
10278 (avail < 4)) {
10279 goto done;
10280 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010281 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010282 ctxt->instate = XML_PARSER_EOF;
10283#ifdef DEBUG_PUSH
10284 xmlGenericError(xmlGenericErrorContext,
10285 "PP: entering EOF\n");
10286#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010287 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010288 ctxt->sax->endDocument(ctxt->userData);
10289 goto done;
10290 }
10291 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010292 case XML_PARSER_DTD: {
10293 /*
10294 * Sorry but progressive parsing of the internal subset
10295 * is not expected to be supported. We first check that
10296 * the full content of the internal subset is available and
10297 * the parsing is launched only at that point.
10298 * Internal subset ends up with "']' S? '>'" in an unescaped
10299 * section and not in a ']]>' sequence which are conditional
10300 * sections (whoever argued to keep that crap in XML deserve
10301 * a place in hell !).
10302 */
10303 int base, i;
10304 xmlChar *buf;
10305 xmlChar quote = 0;
10306
10307 base = ctxt->input->cur - ctxt->input->base;
10308 if (base < 0) return(0);
10309 if (ctxt->checkIndex > base)
10310 base = ctxt->checkIndex;
10311 buf = ctxt->input->buf->buffer->content;
10312 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10313 base++) {
10314 if (quote != 0) {
10315 if (buf[base] == quote)
10316 quote = 0;
10317 continue;
10318 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010319 if ((quote == 0) && (buf[base] == '<')) {
10320 int found = 0;
10321 /* special handling of comments */
10322 if (((unsigned int) base + 4 <
10323 ctxt->input->buf->buffer->use) &&
10324 (buf[base + 1] == '!') &&
10325 (buf[base + 2] == '-') &&
10326 (buf[base + 3] == '-')) {
10327 for (;(unsigned int) base + 3 <
10328 ctxt->input->buf->buffer->use; base++) {
10329 if ((buf[base] == '-') &&
10330 (buf[base + 1] == '-') &&
10331 (buf[base + 2] == '>')) {
10332 found = 1;
10333 base += 2;
10334 break;
10335 }
10336 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010337 if (!found) {
10338#if 0
10339 fprintf(stderr, "unfinished comment\n");
10340#endif
10341 break; /* for */
10342 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010343 continue;
10344 }
10345 }
Owen Taylor3473f882001-02-23 17:55:21 +000010346 if (buf[base] == '"') {
10347 quote = '"';
10348 continue;
10349 }
10350 if (buf[base] == '\'') {
10351 quote = '\'';
10352 continue;
10353 }
10354 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010355#if 0
10356 fprintf(stderr, "%c%c%c%c: ", buf[base],
10357 buf[base + 1], buf[base + 2], buf[base + 3]);
10358#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010359 if ((unsigned int) base +1 >=
10360 ctxt->input->buf->buffer->use)
10361 break;
10362 if (buf[base + 1] == ']') {
10363 /* conditional crap, skip both ']' ! */
10364 base++;
10365 continue;
10366 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010367 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010368 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10369 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010370 if (buf[base + i] == '>') {
10371#if 0
10372 fprintf(stderr, "found\n");
10373#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010374 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010375 }
10376 if (!IS_BLANK_CH(buf[base + i])) {
10377#if 0
10378 fprintf(stderr, "not found\n");
10379#endif
10380 goto not_end_of_int_subset;
10381 }
Owen Taylor3473f882001-02-23 17:55:21 +000010382 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010383#if 0
10384 fprintf(stderr, "end of stream\n");
10385#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010386 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010387
Owen Taylor3473f882001-02-23 17:55:21 +000010388 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010389not_end_of_int_subset:
10390 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010391 }
10392 /*
10393 * We didn't found the end of the Internal subset
10394 */
Owen Taylor3473f882001-02-23 17:55:21 +000010395#ifdef DEBUG_PUSH
10396 if (next == 0)
10397 xmlGenericError(xmlGenericErrorContext,
10398 "PP: lookup of int subset end filed\n");
10399#endif
10400 goto done;
10401
10402found_end_int_subset:
10403 xmlParseInternalSubset(ctxt);
10404 ctxt->inSubset = 2;
10405 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10406 (ctxt->sax->externalSubset != NULL))
10407 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10408 ctxt->extSubSystem, ctxt->extSubURI);
10409 ctxt->inSubset = 0;
10410 ctxt->instate = XML_PARSER_PROLOG;
10411 ctxt->checkIndex = 0;
10412#ifdef DEBUG_PUSH
10413 xmlGenericError(xmlGenericErrorContext,
10414 "PP: entering PROLOG\n");
10415#endif
10416 break;
10417 }
10418 case XML_PARSER_COMMENT:
10419 xmlGenericError(xmlGenericErrorContext,
10420 "PP: internal error, state == COMMENT\n");
10421 ctxt->instate = XML_PARSER_CONTENT;
10422#ifdef DEBUG_PUSH
10423 xmlGenericError(xmlGenericErrorContext,
10424 "PP: entering CONTENT\n");
10425#endif
10426 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010427 case XML_PARSER_IGNORE:
10428 xmlGenericError(xmlGenericErrorContext,
10429 "PP: internal error, state == IGNORE");
10430 ctxt->instate = XML_PARSER_DTD;
10431#ifdef DEBUG_PUSH
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: entering DTD\n");
10434#endif
10435 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010436 case XML_PARSER_PI:
10437 xmlGenericError(xmlGenericErrorContext,
10438 "PP: internal error, state == PI\n");
10439 ctxt->instate = XML_PARSER_CONTENT;
10440#ifdef DEBUG_PUSH
10441 xmlGenericError(xmlGenericErrorContext,
10442 "PP: entering CONTENT\n");
10443#endif
10444 break;
10445 case XML_PARSER_ENTITY_DECL:
10446 xmlGenericError(xmlGenericErrorContext,
10447 "PP: internal error, state == ENTITY_DECL\n");
10448 ctxt->instate = XML_PARSER_DTD;
10449#ifdef DEBUG_PUSH
10450 xmlGenericError(xmlGenericErrorContext,
10451 "PP: entering DTD\n");
10452#endif
10453 break;
10454 case XML_PARSER_ENTITY_VALUE:
10455 xmlGenericError(xmlGenericErrorContext,
10456 "PP: internal error, state == ENTITY_VALUE\n");
10457 ctxt->instate = XML_PARSER_CONTENT;
10458#ifdef DEBUG_PUSH
10459 xmlGenericError(xmlGenericErrorContext,
10460 "PP: entering DTD\n");
10461#endif
10462 break;
10463 case XML_PARSER_ATTRIBUTE_VALUE:
10464 xmlGenericError(xmlGenericErrorContext,
10465 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10466 ctxt->instate = XML_PARSER_START_TAG;
10467#ifdef DEBUG_PUSH
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: entering START_TAG\n");
10470#endif
10471 break;
10472 case XML_PARSER_SYSTEM_LITERAL:
10473 xmlGenericError(xmlGenericErrorContext,
10474 "PP: internal error, state == SYSTEM_LITERAL\n");
10475 ctxt->instate = XML_PARSER_START_TAG;
10476#ifdef DEBUG_PUSH
10477 xmlGenericError(xmlGenericErrorContext,
10478 "PP: entering START_TAG\n");
10479#endif
10480 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010481 case XML_PARSER_PUBLIC_LITERAL:
10482 xmlGenericError(xmlGenericErrorContext,
10483 "PP: internal error, state == PUBLIC_LITERAL\n");
10484 ctxt->instate = XML_PARSER_START_TAG;
10485#ifdef DEBUG_PUSH
10486 xmlGenericError(xmlGenericErrorContext,
10487 "PP: entering START_TAG\n");
10488#endif
10489 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010490 }
10491 }
10492done:
10493#ifdef DEBUG_PUSH
10494 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10495#endif
10496 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010497encoding_error:
10498 {
10499 char buffer[150];
10500
10501 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10502 ctxt->input->cur[0], ctxt->input->cur[1],
10503 ctxt->input->cur[2], ctxt->input->cur[3]);
10504 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10505 "Input is not proper UTF-8, indicate encoding !\n%s",
10506 BAD_CAST buffer, NULL);
10507 }
10508 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010509}
10510
10511/**
Owen Taylor3473f882001-02-23 17:55:21 +000010512 * xmlParseChunk:
10513 * @ctxt: an XML parser context
10514 * @chunk: an char array
10515 * @size: the size in byte of the chunk
10516 * @terminate: last chunk indicator
10517 *
10518 * Parse a Chunk of memory
10519 *
10520 * Returns zero if no error, the xmlParserErrors otherwise.
10521 */
10522int
10523xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10524 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010525 int end_in_lf = 0;
10526
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010527 if (ctxt == NULL)
10528 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010529 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010530 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010531 if (ctxt->instate == XML_PARSER_START)
10532 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010533 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10534 (chunk[size - 1] == '\r')) {
10535 end_in_lf = 1;
10536 size--;
10537 }
Owen Taylor3473f882001-02-23 17:55:21 +000010538 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10539 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10540 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10541 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010542 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010543
William M. Bracka3215c72004-07-31 16:24:01 +000010544 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10545 if (res < 0) {
10546 ctxt->errNo = XML_PARSER_EOF;
10547 ctxt->disableSAX = 1;
10548 return (XML_PARSER_EOF);
10549 }
Owen Taylor3473f882001-02-23 17:55:21 +000010550 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10551 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010552 ctxt->input->end =
10553 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010554#ifdef DEBUG_PUSH
10555 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10556#endif
10557
Owen Taylor3473f882001-02-23 17:55:21 +000010558 } else if (ctxt->instate != XML_PARSER_EOF) {
10559 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10560 xmlParserInputBufferPtr in = ctxt->input->buf;
10561 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10562 (in->raw != NULL)) {
10563 int nbchars;
10564
10565 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10566 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010567 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010568 xmlGenericError(xmlGenericErrorContext,
10569 "xmlParseChunk: encoder error\n");
10570 return(XML_ERR_INVALID_ENCODING);
10571 }
10572 }
10573 }
10574 }
10575 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010576 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10577 (ctxt->input->buf != NULL)) {
10578 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10579 }
Daniel Veillard14412512005-01-21 23:53:26 +000010580 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010581 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010582 if (terminate) {
10583 /*
10584 * Check for termination
10585 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010586 int avail = 0;
10587
10588 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010589 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010590 avail = ctxt->input->length -
10591 (ctxt->input->cur - ctxt->input->base);
10592 else
10593 avail = ctxt->input->buf->buffer->use -
10594 (ctxt->input->cur - ctxt->input->base);
10595 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010596
Owen Taylor3473f882001-02-23 17:55:21 +000010597 if ((ctxt->instate != XML_PARSER_EOF) &&
10598 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010599 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010600 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010601 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010602 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010603 }
Owen Taylor3473f882001-02-23 17:55:21 +000010604 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010605 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010606 ctxt->sax->endDocument(ctxt->userData);
10607 }
10608 ctxt->instate = XML_PARSER_EOF;
10609 }
10610 return((xmlParserErrors) ctxt->errNo);
10611}
10612
10613/************************************************************************
10614 * *
10615 * I/O front end functions to the parser *
10616 * *
10617 ************************************************************************/
10618
10619/**
Owen Taylor3473f882001-02-23 17:55:21 +000010620 * xmlCreatePushParserCtxt:
10621 * @sax: a SAX handler
10622 * @user_data: The user data returned on SAX callbacks
10623 * @chunk: a pointer to an array of chars
10624 * @size: number of chars in the array
10625 * @filename: an optional file name or URI
10626 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010627 * Create a parser context for using the XML parser in push mode.
10628 * If @buffer and @size are non-NULL, the data is used to detect
10629 * the encoding. The remaining characters will be parsed so they
10630 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010631 * To allow content encoding detection, @size should be >= 4
10632 * The value of @filename is used for fetching external entities
10633 * and error/warning reports.
10634 *
10635 * Returns the new parser context or NULL
10636 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010637
Owen Taylor3473f882001-02-23 17:55:21 +000010638xmlParserCtxtPtr
10639xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10640 const char *chunk, int size, const char *filename) {
10641 xmlParserCtxtPtr ctxt;
10642 xmlParserInputPtr inputStream;
10643 xmlParserInputBufferPtr buf;
10644 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10645
10646 /*
10647 * plug some encoding conversion routines
10648 */
10649 if ((chunk != NULL) && (size >= 4))
10650 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10651
10652 buf = xmlAllocParserInputBuffer(enc);
10653 if (buf == NULL) return(NULL);
10654
10655 ctxt = xmlNewParserCtxt();
10656 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010657 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010658 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010659 return(NULL);
10660 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010661 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010662 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10663 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010664 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010665 xmlFreeParserInputBuffer(buf);
10666 xmlFreeParserCtxt(ctxt);
10667 return(NULL);
10668 }
Owen Taylor3473f882001-02-23 17:55:21 +000010669 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010670#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010671 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010672#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010673 xmlFree(ctxt->sax);
10674 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10675 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010676 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010677 xmlFreeParserInputBuffer(buf);
10678 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010679 return(NULL);
10680 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010681 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10682 if (sax->initialized == XML_SAX2_MAGIC)
10683 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10684 else
10685 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010686 if (user_data != NULL)
10687 ctxt->userData = user_data;
10688 }
10689 if (filename == NULL) {
10690 ctxt->directory = NULL;
10691 } else {
10692 ctxt->directory = xmlParserGetDirectory(filename);
10693 }
10694
10695 inputStream = xmlNewInputStream(ctxt);
10696 if (inputStream == NULL) {
10697 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010698 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010699 return(NULL);
10700 }
10701
10702 if (filename == NULL)
10703 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010704 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010705 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010706 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010707 if (inputStream->filename == NULL) {
10708 xmlFreeParserCtxt(ctxt);
10709 xmlFreeParserInputBuffer(buf);
10710 return(NULL);
10711 }
10712 }
Owen Taylor3473f882001-02-23 17:55:21 +000010713 inputStream->buf = buf;
10714 inputStream->base = inputStream->buf->buffer->content;
10715 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010716 inputStream->end =
10717 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010718
10719 inputPush(ctxt, inputStream);
10720
William M. Brack3a1cd212005-02-11 14:35:54 +000010721 /*
10722 * If the caller didn't provide an initial 'chunk' for determining
10723 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10724 * that it can be automatically determined later
10725 */
10726 if ((size == 0) || (chunk == NULL)) {
10727 ctxt->charset = XML_CHAR_ENCODING_NONE;
10728 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010729 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10730 int cur = ctxt->input->cur - ctxt->input->base;
10731
Owen Taylor3473f882001-02-23 17:55:21 +000010732 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010733
10734 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10735 ctxt->input->cur = ctxt->input->base + cur;
10736 ctxt->input->end =
10737 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010738#ifdef DEBUG_PUSH
10739 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10740#endif
10741 }
10742
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010743 if (enc != XML_CHAR_ENCODING_NONE) {
10744 xmlSwitchEncoding(ctxt, enc);
10745 }
10746
Owen Taylor3473f882001-02-23 17:55:21 +000010747 return(ctxt);
10748}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010749#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010750
10751/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010752 * xmlStopParser:
10753 * @ctxt: an XML parser context
10754 *
10755 * Blocks further parser processing
10756 */
10757void
10758xmlStopParser(xmlParserCtxtPtr ctxt) {
10759 if (ctxt == NULL)
10760 return;
10761 ctxt->instate = XML_PARSER_EOF;
10762 ctxt->disableSAX = 1;
10763 if (ctxt->input != NULL) {
10764 ctxt->input->cur = BAD_CAST"";
10765 ctxt->input->base = ctxt->input->cur;
10766 }
10767}
10768
10769/**
Owen Taylor3473f882001-02-23 17:55:21 +000010770 * xmlCreateIOParserCtxt:
10771 * @sax: a SAX handler
10772 * @user_data: The user data returned on SAX callbacks
10773 * @ioread: an I/O read function
10774 * @ioclose: an I/O close function
10775 * @ioctx: an I/O handler
10776 * @enc: the charset encoding if known
10777 *
10778 * Create a parser context for using the XML parser with an existing
10779 * I/O stream
10780 *
10781 * Returns the new parser context or NULL
10782 */
10783xmlParserCtxtPtr
10784xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10785 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10786 void *ioctx, xmlCharEncoding enc) {
10787 xmlParserCtxtPtr ctxt;
10788 xmlParserInputPtr inputStream;
10789 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010790
10791 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010792
10793 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10794 if (buf == NULL) return(NULL);
10795
10796 ctxt = xmlNewParserCtxt();
10797 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010798 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010799 return(NULL);
10800 }
10801 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010802#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010803 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010804#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010805 xmlFree(ctxt->sax);
10806 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10807 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010808 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010809 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010810 return(NULL);
10811 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010812 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10813 if (sax->initialized == XML_SAX2_MAGIC)
10814 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10815 else
10816 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010817 if (user_data != NULL)
10818 ctxt->userData = user_data;
10819 }
10820
10821 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10822 if (inputStream == NULL) {
10823 xmlFreeParserCtxt(ctxt);
10824 return(NULL);
10825 }
10826 inputPush(ctxt, inputStream);
10827
10828 return(ctxt);
10829}
10830
Daniel Veillard4432df22003-09-28 18:58:27 +000010831#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010832/************************************************************************
10833 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010834 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010835 * *
10836 ************************************************************************/
10837
10838/**
10839 * xmlIOParseDTD:
10840 * @sax: the SAX handler block or NULL
10841 * @input: an Input Buffer
10842 * @enc: the charset encoding if known
10843 *
10844 * Load and parse a DTD
10845 *
10846 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010847 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010848 */
10849
10850xmlDtdPtr
10851xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10852 xmlCharEncoding enc) {
10853 xmlDtdPtr ret = NULL;
10854 xmlParserCtxtPtr ctxt;
10855 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010856 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010857
10858 if (input == NULL)
10859 return(NULL);
10860
10861 ctxt = xmlNewParserCtxt();
10862 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010863 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010864 return(NULL);
10865 }
10866
10867 /*
10868 * Set-up the SAX context
10869 */
10870 if (sax != NULL) {
10871 if (ctxt->sax != NULL)
10872 xmlFree(ctxt->sax);
10873 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010874 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010875 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010876 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010877
10878 /*
10879 * generate a parser input from the I/O handler
10880 */
10881
Daniel Veillard43caefb2003-12-07 19:32:22 +000010882 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010883 if (pinput == NULL) {
10884 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010885 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010886 xmlFreeParserCtxt(ctxt);
10887 return(NULL);
10888 }
10889
10890 /*
10891 * plug some encoding conversion routines here.
10892 */
10893 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010894 if (enc != XML_CHAR_ENCODING_NONE) {
10895 xmlSwitchEncoding(ctxt, enc);
10896 }
Owen Taylor3473f882001-02-23 17:55:21 +000010897
10898 pinput->filename = NULL;
10899 pinput->line = 1;
10900 pinput->col = 1;
10901 pinput->base = ctxt->input->cur;
10902 pinput->cur = ctxt->input->cur;
10903 pinput->free = NULL;
10904
10905 /*
10906 * let's parse that entity knowing it's an external subset.
10907 */
10908 ctxt->inSubset = 2;
10909 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10910 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10911 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010912
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010913 if ((enc == XML_CHAR_ENCODING_NONE) &&
10914 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010915 /*
10916 * Get the 4 first bytes and decode the charset
10917 * if enc != XML_CHAR_ENCODING_NONE
10918 * plug some encoding conversion routines.
10919 */
10920 start[0] = RAW;
10921 start[1] = NXT(1);
10922 start[2] = NXT(2);
10923 start[3] = NXT(3);
10924 enc = xmlDetectCharEncoding(start, 4);
10925 if (enc != XML_CHAR_ENCODING_NONE) {
10926 xmlSwitchEncoding(ctxt, enc);
10927 }
10928 }
10929
Owen Taylor3473f882001-02-23 17:55:21 +000010930 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10931
10932 if (ctxt->myDoc != NULL) {
10933 if (ctxt->wellFormed) {
10934 ret = ctxt->myDoc->extSubset;
10935 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010936 if (ret != NULL) {
10937 xmlNodePtr tmp;
10938
10939 ret->doc = NULL;
10940 tmp = ret->children;
10941 while (tmp != NULL) {
10942 tmp->doc = NULL;
10943 tmp = tmp->next;
10944 }
10945 }
Owen Taylor3473f882001-02-23 17:55:21 +000010946 } else {
10947 ret = NULL;
10948 }
10949 xmlFreeDoc(ctxt->myDoc);
10950 ctxt->myDoc = NULL;
10951 }
10952 if (sax != NULL) ctxt->sax = NULL;
10953 xmlFreeParserCtxt(ctxt);
10954
10955 return(ret);
10956}
10957
10958/**
10959 * xmlSAXParseDTD:
10960 * @sax: the SAX handler block
10961 * @ExternalID: a NAME* containing the External ID of the DTD
10962 * @SystemID: a NAME* containing the URL to the DTD
10963 *
10964 * Load and parse an external subset.
10965 *
10966 * Returns the resulting xmlDtdPtr or NULL in case of error.
10967 */
10968
10969xmlDtdPtr
10970xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10971 const xmlChar *SystemID) {
10972 xmlDtdPtr ret = NULL;
10973 xmlParserCtxtPtr ctxt;
10974 xmlParserInputPtr input = NULL;
10975 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010976 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010977
10978 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10979
10980 ctxt = xmlNewParserCtxt();
10981 if (ctxt == NULL) {
10982 return(NULL);
10983 }
10984
10985 /*
10986 * Set-up the SAX context
10987 */
10988 if (sax != NULL) {
10989 if (ctxt->sax != NULL)
10990 xmlFree(ctxt->sax);
10991 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010992 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010993 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010994
10995 /*
10996 * Canonicalise the system ID
10997 */
10998 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010999 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011000 xmlFreeParserCtxt(ctxt);
11001 return(NULL);
11002 }
Owen Taylor3473f882001-02-23 17:55:21 +000011003
11004 /*
11005 * Ask the Entity resolver to load the damn thing
11006 */
11007
11008 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011009 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11010 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011011 if (input == NULL) {
11012 if (sax != NULL) ctxt->sax = NULL;
11013 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011014 if (systemIdCanonic != NULL)
11015 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011016 return(NULL);
11017 }
11018
11019 /*
11020 * plug some encoding conversion routines here.
11021 */
11022 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011023 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11024 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11025 xmlSwitchEncoding(ctxt, enc);
11026 }
Owen Taylor3473f882001-02-23 17:55:21 +000011027
11028 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011029 input->filename = (char *) systemIdCanonic;
11030 else
11031 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011032 input->line = 1;
11033 input->col = 1;
11034 input->base = ctxt->input->cur;
11035 input->cur = ctxt->input->cur;
11036 input->free = NULL;
11037
11038 /*
11039 * let's parse that entity knowing it's an external subset.
11040 */
11041 ctxt->inSubset = 2;
11042 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11043 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11044 ExternalID, SystemID);
11045 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11046
11047 if (ctxt->myDoc != NULL) {
11048 if (ctxt->wellFormed) {
11049 ret = ctxt->myDoc->extSubset;
11050 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011051 if (ret != NULL) {
11052 xmlNodePtr tmp;
11053
11054 ret->doc = NULL;
11055 tmp = ret->children;
11056 while (tmp != NULL) {
11057 tmp->doc = NULL;
11058 tmp = tmp->next;
11059 }
11060 }
Owen Taylor3473f882001-02-23 17:55:21 +000011061 } else {
11062 ret = NULL;
11063 }
11064 xmlFreeDoc(ctxt->myDoc);
11065 ctxt->myDoc = NULL;
11066 }
11067 if (sax != NULL) ctxt->sax = NULL;
11068 xmlFreeParserCtxt(ctxt);
11069
11070 return(ret);
11071}
11072
Daniel Veillard4432df22003-09-28 18:58:27 +000011073
Owen Taylor3473f882001-02-23 17:55:21 +000011074/**
11075 * xmlParseDTD:
11076 * @ExternalID: a NAME* containing the External ID of the DTD
11077 * @SystemID: a NAME* containing the URL to the DTD
11078 *
11079 * Load and parse an external subset.
11080 *
11081 * Returns the resulting xmlDtdPtr or NULL in case of error.
11082 */
11083
11084xmlDtdPtr
11085xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11086 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11087}
Daniel Veillard4432df22003-09-28 18:58:27 +000011088#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011089
11090/************************************************************************
11091 * *
11092 * Front ends when parsing an Entity *
11093 * *
11094 ************************************************************************/
11095
11096/**
Owen Taylor3473f882001-02-23 17:55:21 +000011097 * xmlParseCtxtExternalEntity:
11098 * @ctx: the existing parsing context
11099 * @URL: the URL for the entity to load
11100 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011101 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011102 *
11103 * Parse an external general entity within an existing parsing context
11104 * An external general parsed entity is well-formed if it matches the
11105 * production labeled extParsedEnt.
11106 *
11107 * [78] extParsedEnt ::= TextDecl? content
11108 *
11109 * Returns 0 if the entity is well formed, -1 in case of args problem and
11110 * the parser error code otherwise
11111 */
11112
11113int
11114xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011115 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011116 xmlParserCtxtPtr ctxt;
11117 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011118 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011119 xmlSAXHandlerPtr oldsax = NULL;
11120 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011121 xmlChar start[4];
11122 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011123 xmlParserInputPtr inputStream;
11124 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011125
Daniel Veillardce682bc2004-11-05 17:22:25 +000011126 if (ctx == NULL) return(-1);
11127
Owen Taylor3473f882001-02-23 17:55:21 +000011128 if (ctx->depth > 40) {
11129 return(XML_ERR_ENTITY_LOOP);
11130 }
11131
Daniel Veillardcda96922001-08-21 10:56:31 +000011132 if (lst != NULL)
11133 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011134 if ((URL == NULL) && (ID == NULL))
11135 return(-1);
11136 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11137 return(-1);
11138
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011139 ctxt = xmlNewParserCtxt();
11140 if (ctxt == NULL) {
11141 return(-1);
11142 }
11143
Owen Taylor3473f882001-02-23 17:55:21 +000011144 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011145 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011146
11147 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11148 if (inputStream == NULL) {
11149 xmlFreeParserCtxt(ctxt);
11150 return(-1);
11151 }
11152
11153 inputPush(ctxt, inputStream);
11154
11155 if ((ctxt->directory == NULL) && (directory == NULL))
11156 directory = xmlParserGetDirectory((char *)URL);
11157 if ((ctxt->directory == NULL) && (directory != NULL))
11158 ctxt->directory = directory;
11159
Owen Taylor3473f882001-02-23 17:55:21 +000011160 oldsax = ctxt->sax;
11161 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011162 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011163 newDoc = xmlNewDoc(BAD_CAST "1.0");
11164 if (newDoc == NULL) {
11165 xmlFreeParserCtxt(ctxt);
11166 return(-1);
11167 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011168 if (ctx->myDoc->dict) {
11169 newDoc->dict = ctx->myDoc->dict;
11170 xmlDictReference(newDoc->dict);
11171 }
Owen Taylor3473f882001-02-23 17:55:21 +000011172 if (ctx->myDoc != NULL) {
11173 newDoc->intSubset = ctx->myDoc->intSubset;
11174 newDoc->extSubset = ctx->myDoc->extSubset;
11175 }
11176 if (ctx->myDoc->URL != NULL) {
11177 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11178 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011179 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11180 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011181 ctxt->sax = oldsax;
11182 xmlFreeParserCtxt(ctxt);
11183 newDoc->intSubset = NULL;
11184 newDoc->extSubset = NULL;
11185 xmlFreeDoc(newDoc);
11186 return(-1);
11187 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011188 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011189 nodePush(ctxt, newDoc->children);
11190 if (ctx->myDoc == NULL) {
11191 ctxt->myDoc = newDoc;
11192 } else {
11193 ctxt->myDoc = ctx->myDoc;
11194 newDoc->children->doc = ctx->myDoc;
11195 }
11196
Daniel Veillard87a764e2001-06-20 17:41:10 +000011197 /*
11198 * Get the 4 first bytes and decode the charset
11199 * if enc != XML_CHAR_ENCODING_NONE
11200 * plug some encoding conversion routines.
11201 */
11202 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011203 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11204 start[0] = RAW;
11205 start[1] = NXT(1);
11206 start[2] = NXT(2);
11207 start[3] = NXT(3);
11208 enc = xmlDetectCharEncoding(start, 4);
11209 if (enc != XML_CHAR_ENCODING_NONE) {
11210 xmlSwitchEncoding(ctxt, enc);
11211 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011212 }
11213
Owen Taylor3473f882001-02-23 17:55:21 +000011214 /*
11215 * Parse a possible text declaration first
11216 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011217 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011218 xmlParseTextDecl(ctxt);
11219 }
11220
11221 /*
11222 * Doing validity checking on chunk doesn't make sense
11223 */
11224 ctxt->instate = XML_PARSER_CONTENT;
11225 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011226 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011227 ctxt->loadsubset = ctx->loadsubset;
11228 ctxt->depth = ctx->depth + 1;
11229 ctxt->replaceEntities = ctx->replaceEntities;
11230 if (ctxt->validate) {
11231 ctxt->vctxt.error = ctx->vctxt.error;
11232 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011233 } else {
11234 ctxt->vctxt.error = NULL;
11235 ctxt->vctxt.warning = NULL;
11236 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011237 ctxt->vctxt.nodeTab = NULL;
11238 ctxt->vctxt.nodeNr = 0;
11239 ctxt->vctxt.nodeMax = 0;
11240 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011241 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11242 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011243 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11244 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11245 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011246 ctxt->dictNames = ctx->dictNames;
11247 ctxt->attsDefault = ctx->attsDefault;
11248 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011249 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011250
11251 xmlParseContent(ctxt);
11252
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011253 ctx->validate = ctxt->validate;
11254 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011255 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011258 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011259 }
11260 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011261 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011262 }
11263
11264 if (!ctxt->wellFormed) {
11265 if (ctxt->errNo == 0)
11266 ret = 1;
11267 else
11268 ret = ctxt->errNo;
11269 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011270 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011271 xmlNodePtr cur;
11272
11273 /*
11274 * Return the newly created nodeset after unlinking it from
11275 * they pseudo parent.
11276 */
11277 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011278 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011279 while (cur != NULL) {
11280 cur->parent = NULL;
11281 cur = cur->next;
11282 }
11283 newDoc->children->children = NULL;
11284 }
11285 ret = 0;
11286 }
11287 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011288 ctxt->dict = NULL;
11289 ctxt->attsDefault = NULL;
11290 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011291 xmlFreeParserCtxt(ctxt);
11292 newDoc->intSubset = NULL;
11293 newDoc->extSubset = NULL;
11294 xmlFreeDoc(newDoc);
11295
11296 return(ret);
11297}
11298
11299/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011300 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011301 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011302 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011303 * @sax: the SAX handler bloc (possibly NULL)
11304 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11305 * @depth: Used for loop detection, use 0
11306 * @URL: the URL for the entity to load
11307 * @ID: the System ID for the entity to load
11308 * @list: the return value for the set of parsed nodes
11309 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011310 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011311 *
11312 * Returns 0 if the entity is well formed, -1 in case of args problem and
11313 * the parser error code otherwise
11314 */
11315
Daniel Veillard7d515752003-09-26 19:12:37 +000011316static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011317xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11318 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011319 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011320 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011321 xmlParserCtxtPtr ctxt;
11322 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011323 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011324 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011325 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011326 xmlChar start[4];
11327 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011328
11329 if (depth > 40) {
11330 return(XML_ERR_ENTITY_LOOP);
11331 }
11332
11333
11334
11335 if (list != NULL)
11336 *list = NULL;
11337 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011338 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011339 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011340 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011341
11342
11343 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011344 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011345 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011346 if (oldctxt != NULL) {
11347 ctxt->_private = oldctxt->_private;
11348 ctxt->loadsubset = oldctxt->loadsubset;
11349 ctxt->validate = oldctxt->validate;
11350 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011351 ctxt->record_info = oldctxt->record_info;
11352 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11353 ctxt->node_seq.length = oldctxt->node_seq.length;
11354 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011355 } else {
11356 /*
11357 * Doing validity checking on chunk without context
11358 * doesn't make sense
11359 */
11360 ctxt->_private = NULL;
11361 ctxt->validate = 0;
11362 ctxt->external = 2;
11363 ctxt->loadsubset = 0;
11364 }
Owen Taylor3473f882001-02-23 17:55:21 +000011365 if (sax != NULL) {
11366 oldsax = ctxt->sax;
11367 ctxt->sax = sax;
11368 if (user_data != NULL)
11369 ctxt->userData = user_data;
11370 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011371 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011372 newDoc = xmlNewDoc(BAD_CAST "1.0");
11373 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011374 ctxt->node_seq.maximum = 0;
11375 ctxt->node_seq.length = 0;
11376 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011377 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011378 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011379 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011380 newDoc->intSubset = doc->intSubset;
11381 newDoc->extSubset = doc->extSubset;
11382 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011383 xmlDictReference(newDoc->dict);
11384
Owen Taylor3473f882001-02-23 17:55:21 +000011385 if (doc->URL != NULL) {
11386 newDoc->URL = xmlStrdup(doc->URL);
11387 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011388 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11389 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011390 if (sax != NULL)
11391 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011392 ctxt->node_seq.maximum = 0;
11393 ctxt->node_seq.length = 0;
11394 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011395 xmlFreeParserCtxt(ctxt);
11396 newDoc->intSubset = NULL;
11397 newDoc->extSubset = NULL;
11398 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011399 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011400 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011401 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011402 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011403 ctxt->myDoc = doc;
11404 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011405
Daniel Veillard87a764e2001-06-20 17:41:10 +000011406 /*
11407 * Get the 4 first bytes and decode the charset
11408 * if enc != XML_CHAR_ENCODING_NONE
11409 * plug some encoding conversion routines.
11410 */
11411 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011412 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11413 start[0] = RAW;
11414 start[1] = NXT(1);
11415 start[2] = NXT(2);
11416 start[3] = NXT(3);
11417 enc = xmlDetectCharEncoding(start, 4);
11418 if (enc != XML_CHAR_ENCODING_NONE) {
11419 xmlSwitchEncoding(ctxt, enc);
11420 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011421 }
11422
Owen Taylor3473f882001-02-23 17:55:21 +000011423 /*
11424 * Parse a possible text declaration first
11425 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011426 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011427 xmlParseTextDecl(ctxt);
11428 }
11429
Owen Taylor3473f882001-02-23 17:55:21 +000011430 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011431 ctxt->depth = depth;
11432
11433 xmlParseContent(ctxt);
11434
Daniel Veillard561b7f82002-03-20 21:55:57 +000011435 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011437 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011439 }
11440 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011442 }
11443
11444 if (!ctxt->wellFormed) {
11445 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011446 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011447 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011448 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011449 } else {
11450 if (list != NULL) {
11451 xmlNodePtr cur;
11452
11453 /*
11454 * Return the newly created nodeset after unlinking it from
11455 * they pseudo parent.
11456 */
11457 cur = newDoc->children->children;
11458 *list = cur;
11459 while (cur != NULL) {
11460 cur->parent = NULL;
11461 cur = cur->next;
11462 }
11463 newDoc->children->children = NULL;
11464 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011465 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011466 }
11467 if (sax != NULL)
11468 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011469 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11470 oldctxt->node_seq.length = ctxt->node_seq.length;
11471 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011472 ctxt->node_seq.maximum = 0;
11473 ctxt->node_seq.length = 0;
11474 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011475 xmlFreeParserCtxt(ctxt);
11476 newDoc->intSubset = NULL;
11477 newDoc->extSubset = NULL;
11478 xmlFreeDoc(newDoc);
11479
11480 return(ret);
11481}
11482
Daniel Veillard81273902003-09-30 00:43:48 +000011483#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011484/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011485 * xmlParseExternalEntity:
11486 * @doc: the document the chunk pertains to
11487 * @sax: the SAX handler bloc (possibly NULL)
11488 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11489 * @depth: Used for loop detection, use 0
11490 * @URL: the URL for the entity to load
11491 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011492 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011493 *
11494 * Parse an external general entity
11495 * An external general parsed entity is well-formed if it matches the
11496 * production labeled extParsedEnt.
11497 *
11498 * [78] extParsedEnt ::= TextDecl? content
11499 *
11500 * Returns 0 if the entity is well formed, -1 in case of args problem and
11501 * the parser error code otherwise
11502 */
11503
11504int
11505xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011506 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011507 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011508 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011509}
11510
11511/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011512 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011513 * @doc: the document the chunk pertains to
11514 * @sax: the SAX handler bloc (possibly NULL)
11515 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11516 * @depth: Used for loop detection, use 0
11517 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011518 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011519 *
11520 * Parse a well-balanced chunk of an XML document
11521 * called by the parser
11522 * The allowed sequence for the Well Balanced Chunk is the one defined by
11523 * the content production in the XML grammar:
11524 *
11525 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11526 *
11527 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11528 * the parser error code otherwise
11529 */
11530
11531int
11532xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011533 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011534 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11535 depth, string, lst, 0 );
11536}
Daniel Veillard81273902003-09-30 00:43:48 +000011537#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011538
11539/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011540 * xmlParseBalancedChunkMemoryInternal:
11541 * @oldctxt: the existing parsing context
11542 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11543 * @user_data: the user data field for the parser context
11544 * @lst: the return value for the set of parsed nodes
11545 *
11546 *
11547 * Parse a well-balanced chunk of an XML document
11548 * called by the parser
11549 * The allowed sequence for the Well Balanced Chunk is the one defined by
11550 * the content production in the XML grammar:
11551 *
11552 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11553 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011554 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11555 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556 *
11557 * In case recover is set to 1, the nodelist will not be empty even if
11558 * the parsed chunk is not well balanced.
11559 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011560static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011561xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11562 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11563 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011564 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011565 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011566 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011567 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011568 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011569 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011570 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011571
11572 if (oldctxt->depth > 40) {
11573 return(XML_ERR_ENTITY_LOOP);
11574 }
11575
11576
11577 if (lst != NULL)
11578 *lst = NULL;
11579 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011580 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011581
11582 size = xmlStrlen(string);
11583
11584 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011585 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011586 if (user_data != NULL)
11587 ctxt->userData = user_data;
11588 else
11589 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011590 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11591 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011592 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11593 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11594 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011595
11596 oldsax = ctxt->sax;
11597 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011598 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011599 ctxt->replaceEntities = oldctxt->replaceEntities;
11600 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011601
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011602 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011603 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011604 newDoc = xmlNewDoc(BAD_CAST "1.0");
11605 if (newDoc == NULL) {
11606 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011607 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011608 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011609 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011610 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011611 newDoc->dict = ctxt->dict;
11612 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011613 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011614 } else {
11615 ctxt->myDoc = oldctxt->myDoc;
11616 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011617 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011618 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011619 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11620 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011621 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011622 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011623 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011624 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011625 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011626 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011627 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011628 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011629 ctxt->myDoc->children = NULL;
11630 ctxt->myDoc->last = NULL;
11631 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011632 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011633 ctxt->instate = XML_PARSER_CONTENT;
11634 ctxt->depth = oldctxt->depth + 1;
11635
Daniel Veillard328f48c2002-11-15 15:24:34 +000011636 ctxt->validate = 0;
11637 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011638 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11639 /*
11640 * ID/IDREF registration will be done in xmlValidateElement below
11641 */
11642 ctxt->loadsubset |= XML_SKIP_IDS;
11643 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011644 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011645 ctxt->attsDefault = oldctxt->attsDefault;
11646 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011647
Daniel Veillard68e9e742002-11-16 15:35:11 +000011648 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011649 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011651 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011652 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011653 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011654 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011655 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011656 }
11657
11658 if (!ctxt->wellFormed) {
11659 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011660 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011661 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011662 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011663 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011664 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011665 }
11666
William M. Brack7b9154b2003-09-27 19:23:50 +000011667 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011668 xmlNodePtr cur;
11669
11670 /*
11671 * Return the newly created nodeset after unlinking it from
11672 * they pseudo parent.
11673 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011674 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011675 *lst = cur;
11676 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011677#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011678 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11679 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11680 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011681 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11682 oldctxt->myDoc, cur);
11683 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011684#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011685 cur->parent = NULL;
11686 cur = cur->next;
11687 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011688 ctxt->myDoc->children->children = NULL;
11689 }
11690 if (ctxt->myDoc != NULL) {
11691 xmlFreeNode(ctxt->myDoc->children);
11692 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011693 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011694 }
11695
11696 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011697 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011698 ctxt->attsDefault = NULL;
11699 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011700 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011701 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011702 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011703 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011704
11705 return(ret);
11706}
11707
Daniel Veillard29b17482004-08-16 00:39:03 +000011708/**
11709 * xmlParseInNodeContext:
11710 * @node: the context node
11711 * @data: the input string
11712 * @datalen: the input string length in bytes
11713 * @options: a combination of xmlParserOption
11714 * @lst: the return value for the set of parsed nodes
11715 *
11716 * Parse a well-balanced chunk of an XML document
11717 * within the context (DTD, namespaces, etc ...) of the given node.
11718 *
11719 * The allowed sequence for the data is a Well Balanced Chunk defined by
11720 * the content production in the XML grammar:
11721 *
11722 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11723 *
11724 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11725 * error code otherwise
11726 */
11727xmlParserErrors
11728xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11729 int options, xmlNodePtr *lst) {
11730#ifdef SAX2
11731 xmlParserCtxtPtr ctxt;
11732 xmlDocPtr doc = NULL;
11733 xmlNodePtr fake, cur;
11734 int nsnr = 0;
11735
11736 xmlParserErrors ret = XML_ERR_OK;
11737
11738 /*
11739 * check all input parameters, grab the document
11740 */
11741 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11742 return(XML_ERR_INTERNAL_ERROR);
11743 switch (node->type) {
11744 case XML_ELEMENT_NODE:
11745 case XML_ATTRIBUTE_NODE:
11746 case XML_TEXT_NODE:
11747 case XML_CDATA_SECTION_NODE:
11748 case XML_ENTITY_REF_NODE:
11749 case XML_PI_NODE:
11750 case XML_COMMENT_NODE:
11751 case XML_DOCUMENT_NODE:
11752 case XML_HTML_DOCUMENT_NODE:
11753 break;
11754 default:
11755 return(XML_ERR_INTERNAL_ERROR);
11756
11757 }
11758 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11759 (node->type != XML_DOCUMENT_NODE) &&
11760 (node->type != XML_HTML_DOCUMENT_NODE))
11761 node = node->parent;
11762 if (node == NULL)
11763 return(XML_ERR_INTERNAL_ERROR);
11764 if (node->type == XML_ELEMENT_NODE)
11765 doc = node->doc;
11766 else
11767 doc = (xmlDocPtr) node;
11768 if (doc == NULL)
11769 return(XML_ERR_INTERNAL_ERROR);
11770
11771 /*
11772 * allocate a context and set-up everything not related to the
11773 * node position in the tree
11774 */
11775 if (doc->type == XML_DOCUMENT_NODE)
11776 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11777#ifdef LIBXML_HTML_ENABLED
11778 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11779 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11780#endif
11781 else
11782 return(XML_ERR_INTERNAL_ERROR);
11783
11784 if (ctxt == NULL)
11785 return(XML_ERR_NO_MEMORY);
11786 fake = xmlNewComment(NULL);
11787 if (fake == NULL) {
11788 xmlFreeParserCtxt(ctxt);
11789 return(XML_ERR_NO_MEMORY);
11790 }
11791 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011792
11793 /*
11794 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11795 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11796 * we must wait until the last moment to free the original one.
11797 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011798 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011799 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011800 xmlDictFree(ctxt->dict);
11801 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011802 } else
11803 options |= XML_PARSE_NODICT;
11804
11805 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011806 xmlDetectSAX2(ctxt);
11807 ctxt->myDoc = doc;
11808
11809 if (node->type == XML_ELEMENT_NODE) {
11810 nodePush(ctxt, node);
11811 /*
11812 * initialize the SAX2 namespaces stack
11813 */
11814 cur = node;
11815 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11816 xmlNsPtr ns = cur->nsDef;
11817 const xmlChar *iprefix, *ihref;
11818
11819 while (ns != NULL) {
11820 if (ctxt->dict) {
11821 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11822 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11823 } else {
11824 iprefix = ns->prefix;
11825 ihref = ns->href;
11826 }
11827
11828 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11829 nsPush(ctxt, iprefix, ihref);
11830 nsnr++;
11831 }
11832 ns = ns->next;
11833 }
11834 cur = cur->parent;
11835 }
11836 ctxt->instate = XML_PARSER_CONTENT;
11837 }
11838
11839 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11840 /*
11841 * ID/IDREF registration will be done in xmlValidateElement below
11842 */
11843 ctxt->loadsubset |= XML_SKIP_IDS;
11844 }
11845
Daniel Veillard499cc922006-01-18 17:22:35 +000011846#ifdef LIBXML_HTML_ENABLED
11847 if (doc->type == XML_HTML_DOCUMENT_NODE)
11848 __htmlParseContent(ctxt);
11849 else
11850#endif
11851 xmlParseContent(ctxt);
11852
Daniel Veillard29b17482004-08-16 00:39:03 +000011853 nsPop(ctxt, nsnr);
11854 if ((RAW == '<') && (NXT(1) == '/')) {
11855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11856 } else if (RAW != 0) {
11857 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11858 }
11859 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11860 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11861 ctxt->wellFormed = 0;
11862 }
11863
11864 if (!ctxt->wellFormed) {
11865 if (ctxt->errNo == 0)
11866 ret = XML_ERR_INTERNAL_ERROR;
11867 else
11868 ret = (xmlParserErrors)ctxt->errNo;
11869 } else {
11870 ret = XML_ERR_OK;
11871 }
11872
11873 /*
11874 * Return the newly created nodeset after unlinking it from
11875 * the pseudo sibling.
11876 */
11877
11878 cur = fake->next;
11879 fake->next = NULL;
11880 node->last = fake;
11881
11882 if (cur != NULL) {
11883 cur->prev = NULL;
11884 }
11885
11886 *lst = cur;
11887
11888 while (cur != NULL) {
11889 cur->parent = NULL;
11890 cur = cur->next;
11891 }
11892
11893 xmlUnlinkNode(fake);
11894 xmlFreeNode(fake);
11895
11896
11897 if (ret != XML_ERR_OK) {
11898 xmlFreeNodeList(*lst);
11899 *lst = NULL;
11900 }
William M. Brackc3f81342004-10-03 01:22:44 +000011901
William M. Brackb7b54de2004-10-06 16:38:01 +000011902 if (doc->dict != NULL)
11903 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011904 xmlFreeParserCtxt(ctxt);
11905
11906 return(ret);
11907#else /* !SAX2 */
11908 return(XML_ERR_INTERNAL_ERROR);
11909#endif
11910}
11911
Daniel Veillard81273902003-09-30 00:43:48 +000011912#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011913/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011914 * xmlParseBalancedChunkMemoryRecover:
11915 * @doc: the document the chunk pertains to
11916 * @sax: the SAX handler bloc (possibly NULL)
11917 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11918 * @depth: Used for loop detection, use 0
11919 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11920 * @lst: the return value for the set of parsed nodes
11921 * @recover: return nodes even if the data is broken (use 0)
11922 *
11923 *
11924 * Parse a well-balanced chunk of an XML document
11925 * called by the parser
11926 * The allowed sequence for the Well Balanced Chunk is the one defined by
11927 * the content production in the XML grammar:
11928 *
11929 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11930 *
11931 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11932 * the parser error code otherwise
11933 *
11934 * In case recover is set to 1, the nodelist will not be empty even if
11935 * the parsed chunk is not well balanced.
11936 */
11937int
11938xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11939 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11940 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011941 xmlParserCtxtPtr ctxt;
11942 xmlDocPtr newDoc;
11943 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011944 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011945 int size;
11946 int ret = 0;
11947
11948 if (depth > 40) {
11949 return(XML_ERR_ENTITY_LOOP);
11950 }
11951
11952
Daniel Veillardcda96922001-08-21 10:56:31 +000011953 if (lst != NULL)
11954 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011955 if (string == NULL)
11956 return(-1);
11957
11958 size = xmlStrlen(string);
11959
11960 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11961 if (ctxt == NULL) return(-1);
11962 ctxt->userData = ctxt;
11963 if (sax != NULL) {
11964 oldsax = ctxt->sax;
11965 ctxt->sax = sax;
11966 if (user_data != NULL)
11967 ctxt->userData = user_data;
11968 }
11969 newDoc = xmlNewDoc(BAD_CAST "1.0");
11970 if (newDoc == NULL) {
11971 xmlFreeParserCtxt(ctxt);
11972 return(-1);
11973 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011974 if ((doc != NULL) && (doc->dict != NULL)) {
11975 xmlDictFree(ctxt->dict);
11976 ctxt->dict = doc->dict;
11977 xmlDictReference(ctxt->dict);
11978 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11979 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11980 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11981 ctxt->dictNames = 1;
11982 } else {
11983 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11984 }
Owen Taylor3473f882001-02-23 17:55:21 +000011985 if (doc != NULL) {
11986 newDoc->intSubset = doc->intSubset;
11987 newDoc->extSubset = doc->extSubset;
11988 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011989 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11990 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011991 if (sax != NULL)
11992 ctxt->sax = oldsax;
11993 xmlFreeParserCtxt(ctxt);
11994 newDoc->intSubset = NULL;
11995 newDoc->extSubset = NULL;
11996 xmlFreeDoc(newDoc);
11997 return(-1);
11998 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011999 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12000 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012001 if (doc == NULL) {
12002 ctxt->myDoc = newDoc;
12003 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012004 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012005 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012006 /* Ensure that doc has XML spec namespace */
12007 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12008 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012009 }
12010 ctxt->instate = XML_PARSER_CONTENT;
12011 ctxt->depth = depth;
12012
12013 /*
12014 * Doing validity checking on chunk doesn't make sense
12015 */
12016 ctxt->validate = 0;
12017 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012018 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012019
Daniel Veillardb39bc392002-10-26 19:29:51 +000012020 if ( doc != NULL ){
12021 content = doc->children;
12022 doc->children = NULL;
12023 xmlParseContent(ctxt);
12024 doc->children = content;
12025 }
12026 else {
12027 xmlParseContent(ctxt);
12028 }
Owen Taylor3473f882001-02-23 17:55:21 +000012029 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012030 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012031 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012032 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012033 }
12034 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012035 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012036 }
12037
12038 if (!ctxt->wellFormed) {
12039 if (ctxt->errNo == 0)
12040 ret = 1;
12041 else
12042 ret = ctxt->errNo;
12043 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012044 ret = 0;
12045 }
12046
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012047 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12048 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012049
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012050 /*
12051 * Return the newly created nodeset after unlinking it from
12052 * they pseudo parent.
12053 */
12054 cur = newDoc->children->children;
12055 *lst = cur;
12056 while (cur != NULL) {
12057 xmlSetTreeDoc(cur, doc);
12058 cur->parent = NULL;
12059 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012060 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012061 newDoc->children->children = NULL;
12062 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012063
Owen Taylor3473f882001-02-23 17:55:21 +000012064 if (sax != NULL)
12065 ctxt->sax = oldsax;
12066 xmlFreeParserCtxt(ctxt);
12067 newDoc->intSubset = NULL;
12068 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012069 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012070 xmlFreeDoc(newDoc);
12071
12072 return(ret);
12073}
12074
12075/**
12076 * xmlSAXParseEntity:
12077 * @sax: the SAX handler block
12078 * @filename: the filename
12079 *
12080 * parse an XML external entity out of context and build a tree.
12081 * It use the given SAX function block to handle the parsing callback.
12082 * If sax is NULL, fallback to the default DOM tree building routines.
12083 *
12084 * [78] extParsedEnt ::= TextDecl? content
12085 *
12086 * This correspond to a "Well Balanced" chunk
12087 *
12088 * Returns the resulting document tree
12089 */
12090
12091xmlDocPtr
12092xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12093 xmlDocPtr ret;
12094 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012095
12096 ctxt = xmlCreateFileParserCtxt(filename);
12097 if (ctxt == NULL) {
12098 return(NULL);
12099 }
12100 if (sax != NULL) {
12101 if (ctxt->sax != NULL)
12102 xmlFree(ctxt->sax);
12103 ctxt->sax = sax;
12104 ctxt->userData = NULL;
12105 }
12106
Owen Taylor3473f882001-02-23 17:55:21 +000012107 xmlParseExtParsedEnt(ctxt);
12108
12109 if (ctxt->wellFormed)
12110 ret = ctxt->myDoc;
12111 else {
12112 ret = NULL;
12113 xmlFreeDoc(ctxt->myDoc);
12114 ctxt->myDoc = NULL;
12115 }
12116 if (sax != NULL)
12117 ctxt->sax = NULL;
12118 xmlFreeParserCtxt(ctxt);
12119
12120 return(ret);
12121}
12122
12123/**
12124 * xmlParseEntity:
12125 * @filename: the filename
12126 *
12127 * parse an XML external entity out of context and build a tree.
12128 *
12129 * [78] extParsedEnt ::= TextDecl? content
12130 *
12131 * This correspond to a "Well Balanced" chunk
12132 *
12133 * Returns the resulting document tree
12134 */
12135
12136xmlDocPtr
12137xmlParseEntity(const char *filename) {
12138 return(xmlSAXParseEntity(NULL, filename));
12139}
Daniel Veillard81273902003-09-30 00:43:48 +000012140#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012141
12142/**
12143 * xmlCreateEntityParserCtxt:
12144 * @URL: the entity URL
12145 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012146 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012147 *
12148 * Create a parser context for an external entity
12149 * Automatic support for ZLIB/Compress compressed document is provided
12150 * by default if found at compile-time.
12151 *
12152 * Returns the new parser context or NULL
12153 */
12154xmlParserCtxtPtr
12155xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12156 const xmlChar *base) {
12157 xmlParserCtxtPtr ctxt;
12158 xmlParserInputPtr inputStream;
12159 char *directory = NULL;
12160 xmlChar *uri;
12161
12162 ctxt = xmlNewParserCtxt();
12163 if (ctxt == NULL) {
12164 return(NULL);
12165 }
12166
12167 uri = xmlBuildURI(URL, base);
12168
12169 if (uri == NULL) {
12170 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12171 if (inputStream == NULL) {
12172 xmlFreeParserCtxt(ctxt);
12173 return(NULL);
12174 }
12175
12176 inputPush(ctxt, inputStream);
12177
12178 if ((ctxt->directory == NULL) && (directory == NULL))
12179 directory = xmlParserGetDirectory((char *)URL);
12180 if ((ctxt->directory == NULL) && (directory != NULL))
12181 ctxt->directory = directory;
12182 } else {
12183 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12184 if (inputStream == NULL) {
12185 xmlFree(uri);
12186 xmlFreeParserCtxt(ctxt);
12187 return(NULL);
12188 }
12189
12190 inputPush(ctxt, inputStream);
12191
12192 if ((ctxt->directory == NULL) && (directory == NULL))
12193 directory = xmlParserGetDirectory((char *)uri);
12194 if ((ctxt->directory == NULL) && (directory != NULL))
12195 ctxt->directory = directory;
12196 xmlFree(uri);
12197 }
Owen Taylor3473f882001-02-23 17:55:21 +000012198 return(ctxt);
12199}
12200
12201/************************************************************************
12202 * *
12203 * Front ends when parsing from a file *
12204 * *
12205 ************************************************************************/
12206
12207/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012208 * xmlCreateURLParserCtxt:
12209 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012210 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012211 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012212 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012213 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012214 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012215 *
12216 * Returns the new parser context or NULL
12217 */
12218xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012219xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012220{
12221 xmlParserCtxtPtr ctxt;
12222 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012223 char *directory = NULL;
12224
Owen Taylor3473f882001-02-23 17:55:21 +000012225 ctxt = xmlNewParserCtxt();
12226 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012227 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012228 return(NULL);
12229 }
12230
Daniel Veillarddf292f72005-01-16 19:00:15 +000012231 if (options)
12232 xmlCtxtUseOptions(ctxt, options);
12233 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012234
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012235 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012236 if (inputStream == NULL) {
12237 xmlFreeParserCtxt(ctxt);
12238 return(NULL);
12239 }
12240
Owen Taylor3473f882001-02-23 17:55:21 +000012241 inputPush(ctxt, inputStream);
12242 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012243 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012244 if ((ctxt->directory == NULL) && (directory != NULL))
12245 ctxt->directory = directory;
12246
12247 return(ctxt);
12248}
12249
Daniel Veillard61b93382003-11-03 14:28:31 +000012250/**
12251 * xmlCreateFileParserCtxt:
12252 * @filename: the filename
12253 *
12254 * Create a parser context for a file content.
12255 * Automatic support for ZLIB/Compress compressed document is provided
12256 * by default if found at compile-time.
12257 *
12258 * Returns the new parser context or NULL
12259 */
12260xmlParserCtxtPtr
12261xmlCreateFileParserCtxt(const char *filename)
12262{
12263 return(xmlCreateURLParserCtxt(filename, 0));
12264}
12265
Daniel Veillard81273902003-09-30 00:43:48 +000012266#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012267/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012268 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012269 * @sax: the SAX handler block
12270 * @filename: the filename
12271 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12272 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012273 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012274 *
12275 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12276 * compressed document is provided by default if found at compile-time.
12277 * It use the given SAX function block to handle the parsing callback.
12278 * If sax is NULL, fallback to the default DOM tree building routines.
12279 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012280 * User data (void *) is stored within the parser context in the
12281 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012282 *
Owen Taylor3473f882001-02-23 17:55:21 +000012283 * Returns the resulting document tree
12284 */
12285
12286xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012287xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12288 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012289 xmlDocPtr ret;
12290 xmlParserCtxtPtr ctxt;
12291 char *directory = NULL;
12292
Daniel Veillard635ef722001-10-29 11:48:19 +000012293 xmlInitParser();
12294
Owen Taylor3473f882001-02-23 17:55:21 +000012295 ctxt = xmlCreateFileParserCtxt(filename);
12296 if (ctxt == NULL) {
12297 return(NULL);
12298 }
12299 if (sax != NULL) {
12300 if (ctxt->sax != NULL)
12301 xmlFree(ctxt->sax);
12302 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012303 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012304 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012305 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012306 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012307 }
Owen Taylor3473f882001-02-23 17:55:21 +000012308
12309 if ((ctxt->directory == NULL) && (directory == NULL))
12310 directory = xmlParserGetDirectory(filename);
12311 if ((ctxt->directory == NULL) && (directory != NULL))
12312 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12313
Daniel Veillarddad3f682002-11-17 16:47:27 +000012314 ctxt->recovery = recovery;
12315
Owen Taylor3473f882001-02-23 17:55:21 +000012316 xmlParseDocument(ctxt);
12317
William M. Brackc07329e2003-09-08 01:57:30 +000012318 if ((ctxt->wellFormed) || recovery) {
12319 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012320 if (ret != NULL) {
12321 if (ctxt->input->buf->compressed > 0)
12322 ret->compression = 9;
12323 else
12324 ret->compression = ctxt->input->buf->compressed;
12325 }
William M. Brackc07329e2003-09-08 01:57:30 +000012326 }
Owen Taylor3473f882001-02-23 17:55:21 +000012327 else {
12328 ret = NULL;
12329 xmlFreeDoc(ctxt->myDoc);
12330 ctxt->myDoc = NULL;
12331 }
12332 if (sax != NULL)
12333 ctxt->sax = NULL;
12334 xmlFreeParserCtxt(ctxt);
12335
12336 return(ret);
12337}
12338
12339/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012340 * xmlSAXParseFile:
12341 * @sax: the SAX handler block
12342 * @filename: the filename
12343 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12344 * documents
12345 *
12346 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12347 * compressed document is provided by default if found at compile-time.
12348 * It use the given SAX function block to handle the parsing callback.
12349 * If sax is NULL, fallback to the default DOM tree building routines.
12350 *
12351 * Returns the resulting document tree
12352 */
12353
12354xmlDocPtr
12355xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12356 int recovery) {
12357 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12358}
12359
12360/**
Owen Taylor3473f882001-02-23 17:55:21 +000012361 * xmlRecoverDoc:
12362 * @cur: a pointer to an array of xmlChar
12363 *
12364 * parse an XML in-memory document and build a tree.
12365 * In the case the document is not Well Formed, a tree is built anyway
12366 *
12367 * Returns the resulting document tree
12368 */
12369
12370xmlDocPtr
12371xmlRecoverDoc(xmlChar *cur) {
12372 return(xmlSAXParseDoc(NULL, cur, 1));
12373}
12374
12375/**
12376 * xmlParseFile:
12377 * @filename: the filename
12378 *
12379 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12380 * compressed document is provided by default if found at compile-time.
12381 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012382 * Returns the resulting document tree if the file was wellformed,
12383 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012384 */
12385
12386xmlDocPtr
12387xmlParseFile(const char *filename) {
12388 return(xmlSAXParseFile(NULL, filename, 0));
12389}
12390
12391/**
12392 * xmlRecoverFile:
12393 * @filename: the filename
12394 *
12395 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12396 * compressed document is provided by default if found at compile-time.
12397 * In the case the document is not Well Formed, a tree is built anyway
12398 *
12399 * Returns the resulting document tree
12400 */
12401
12402xmlDocPtr
12403xmlRecoverFile(const char *filename) {
12404 return(xmlSAXParseFile(NULL, filename, 1));
12405}
12406
12407
12408/**
12409 * xmlSetupParserForBuffer:
12410 * @ctxt: an XML parser context
12411 * @buffer: a xmlChar * buffer
12412 * @filename: a file name
12413 *
12414 * Setup the parser context to parse a new buffer; Clears any prior
12415 * contents from the parser context. The buffer parameter must not be
12416 * NULL, but the filename parameter can be
12417 */
12418void
12419xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12420 const char* filename)
12421{
12422 xmlParserInputPtr input;
12423
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012424 if ((ctxt == NULL) || (buffer == NULL))
12425 return;
12426
Owen Taylor3473f882001-02-23 17:55:21 +000012427 input = xmlNewInputStream(ctxt);
12428 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012429 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012430 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012431 return;
12432 }
12433
12434 xmlClearParserCtxt(ctxt);
12435 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012436 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012437 input->base = buffer;
12438 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012439 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012440 inputPush(ctxt, input);
12441}
12442
12443/**
12444 * xmlSAXUserParseFile:
12445 * @sax: a SAX handler
12446 * @user_data: The user data returned on SAX callbacks
12447 * @filename: a file name
12448 *
12449 * parse an XML file and call the given SAX handler routines.
12450 * Automatic support for ZLIB/Compress compressed document is provided
12451 *
12452 * Returns 0 in case of success or a error number otherwise
12453 */
12454int
12455xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12456 const char *filename) {
12457 int ret = 0;
12458 xmlParserCtxtPtr ctxt;
12459
12460 ctxt = xmlCreateFileParserCtxt(filename);
12461 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012462#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012463 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012464#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012465 xmlFree(ctxt->sax);
12466 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012467 xmlDetectSAX2(ctxt);
12468
Owen Taylor3473f882001-02-23 17:55:21 +000012469 if (user_data != NULL)
12470 ctxt->userData = user_data;
12471
12472 xmlParseDocument(ctxt);
12473
12474 if (ctxt->wellFormed)
12475 ret = 0;
12476 else {
12477 if (ctxt->errNo != 0)
12478 ret = ctxt->errNo;
12479 else
12480 ret = -1;
12481 }
12482 if (sax != NULL)
12483 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012484 if (ctxt->myDoc != NULL) {
12485 xmlFreeDoc(ctxt->myDoc);
12486 ctxt->myDoc = NULL;
12487 }
Owen Taylor3473f882001-02-23 17:55:21 +000012488 xmlFreeParserCtxt(ctxt);
12489
12490 return ret;
12491}
Daniel Veillard81273902003-09-30 00:43:48 +000012492#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012493
12494/************************************************************************
12495 * *
12496 * Front ends when parsing from memory *
12497 * *
12498 ************************************************************************/
12499
12500/**
12501 * xmlCreateMemoryParserCtxt:
12502 * @buffer: a pointer to a char array
12503 * @size: the size of the array
12504 *
12505 * Create a parser context for an XML in-memory document.
12506 *
12507 * Returns the new parser context or NULL
12508 */
12509xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012510xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012511 xmlParserCtxtPtr ctxt;
12512 xmlParserInputPtr input;
12513 xmlParserInputBufferPtr buf;
12514
12515 if (buffer == NULL)
12516 return(NULL);
12517 if (size <= 0)
12518 return(NULL);
12519
12520 ctxt = xmlNewParserCtxt();
12521 if (ctxt == NULL)
12522 return(NULL);
12523
Daniel Veillard53350552003-09-18 13:35:51 +000012524 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012525 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012526 if (buf == NULL) {
12527 xmlFreeParserCtxt(ctxt);
12528 return(NULL);
12529 }
Owen Taylor3473f882001-02-23 17:55:21 +000012530
12531 input = xmlNewInputStream(ctxt);
12532 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012533 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012534 xmlFreeParserCtxt(ctxt);
12535 return(NULL);
12536 }
12537
12538 input->filename = NULL;
12539 input->buf = buf;
12540 input->base = input->buf->buffer->content;
12541 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012542 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012543
12544 inputPush(ctxt, input);
12545 return(ctxt);
12546}
12547
Daniel Veillard81273902003-09-30 00:43:48 +000012548#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012549/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012550 * xmlSAXParseMemoryWithData:
12551 * @sax: the SAX handler block
12552 * @buffer: an pointer to a char array
12553 * @size: the size of the array
12554 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12555 * documents
12556 * @data: the userdata
12557 *
12558 * parse an XML in-memory block and use the given SAX function block
12559 * to handle the parsing callback. If sax is NULL, fallback to the default
12560 * DOM tree building routines.
12561 *
12562 * User data (void *) is stored within the parser context in the
12563 * context's _private member, so it is available nearly everywhere in libxml
12564 *
12565 * Returns the resulting document tree
12566 */
12567
12568xmlDocPtr
12569xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12570 int size, int recovery, void *data) {
12571 xmlDocPtr ret;
12572 xmlParserCtxtPtr ctxt;
12573
12574 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12575 if (ctxt == NULL) return(NULL);
12576 if (sax != NULL) {
12577 if (ctxt->sax != NULL)
12578 xmlFree(ctxt->sax);
12579 ctxt->sax = sax;
12580 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012581 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012582 if (data!=NULL) {
12583 ctxt->_private=data;
12584 }
12585
Daniel Veillardadba5f12003-04-04 16:09:01 +000012586 ctxt->recovery = recovery;
12587
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012588 xmlParseDocument(ctxt);
12589
12590 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12591 else {
12592 ret = NULL;
12593 xmlFreeDoc(ctxt->myDoc);
12594 ctxt->myDoc = NULL;
12595 }
12596 if (sax != NULL)
12597 ctxt->sax = NULL;
12598 xmlFreeParserCtxt(ctxt);
12599
12600 return(ret);
12601}
12602
12603/**
Owen Taylor3473f882001-02-23 17:55:21 +000012604 * xmlSAXParseMemory:
12605 * @sax: the SAX handler block
12606 * @buffer: an pointer to a char array
12607 * @size: the size of the array
12608 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12609 * documents
12610 *
12611 * parse an XML in-memory block and use the given SAX function block
12612 * to handle the parsing callback. If sax is NULL, fallback to the default
12613 * DOM tree building routines.
12614 *
12615 * Returns the resulting document tree
12616 */
12617xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012618xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12619 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012620 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012621}
12622
12623/**
12624 * xmlParseMemory:
12625 * @buffer: an pointer to a char array
12626 * @size: the size of the array
12627 *
12628 * parse an XML in-memory block and build a tree.
12629 *
12630 * Returns the resulting document tree
12631 */
12632
Daniel Veillard50822cb2001-07-26 20:05:51 +000012633xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012634 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12635}
12636
12637/**
12638 * xmlRecoverMemory:
12639 * @buffer: an pointer to a char array
12640 * @size: the size of the array
12641 *
12642 * parse an XML in-memory block and build a tree.
12643 * In the case the document is not Well Formed, a tree is built anyway
12644 *
12645 * Returns the resulting document tree
12646 */
12647
Daniel Veillard50822cb2001-07-26 20:05:51 +000012648xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012649 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12650}
12651
12652/**
12653 * xmlSAXUserParseMemory:
12654 * @sax: a SAX handler
12655 * @user_data: The user data returned on SAX callbacks
12656 * @buffer: an in-memory XML document input
12657 * @size: the length of the XML document in bytes
12658 *
12659 * A better SAX parsing routine.
12660 * parse an XML in-memory buffer and call the given SAX handler routines.
12661 *
12662 * Returns 0 in case of success or a error number otherwise
12663 */
12664int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012665 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012666 int ret = 0;
12667 xmlParserCtxtPtr ctxt;
12668 xmlSAXHandlerPtr oldsax = NULL;
12669
Daniel Veillard9e923512002-08-14 08:48:52 +000012670 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012671 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12672 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012673 oldsax = ctxt->sax;
12674 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012675 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012676 if (user_data != NULL)
12677 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012678
12679 xmlParseDocument(ctxt);
12680
12681 if (ctxt->wellFormed)
12682 ret = 0;
12683 else {
12684 if (ctxt->errNo != 0)
12685 ret = ctxt->errNo;
12686 else
12687 ret = -1;
12688 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012689 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012690 if (ctxt->myDoc != NULL) {
12691 xmlFreeDoc(ctxt->myDoc);
12692 ctxt->myDoc = NULL;
12693 }
Owen Taylor3473f882001-02-23 17:55:21 +000012694 xmlFreeParserCtxt(ctxt);
12695
12696 return ret;
12697}
Daniel Veillard81273902003-09-30 00:43:48 +000012698#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012699
12700/**
12701 * xmlCreateDocParserCtxt:
12702 * @cur: a pointer to an array of xmlChar
12703 *
12704 * Creates a parser context for an XML in-memory document.
12705 *
12706 * Returns the new parser context or NULL
12707 */
12708xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012709xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012710 int len;
12711
12712 if (cur == NULL)
12713 return(NULL);
12714 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012715 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012716}
12717
Daniel Veillard81273902003-09-30 00:43:48 +000012718#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012719/**
12720 * xmlSAXParseDoc:
12721 * @sax: the SAX handler block
12722 * @cur: a pointer to an array of xmlChar
12723 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12724 * documents
12725 *
12726 * parse an XML in-memory document and build a tree.
12727 * It use the given SAX function block to handle the parsing callback.
12728 * If sax is NULL, fallback to the default DOM tree building routines.
12729 *
12730 * Returns the resulting document tree
12731 */
12732
12733xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012734xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012735 xmlDocPtr ret;
12736 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012737 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012738
Daniel Veillard38936062004-11-04 17:45:11 +000012739 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012740
12741
12742 ctxt = xmlCreateDocParserCtxt(cur);
12743 if (ctxt == NULL) return(NULL);
12744 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012745 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012746 ctxt->sax = sax;
12747 ctxt->userData = NULL;
12748 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012749 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012750
12751 xmlParseDocument(ctxt);
12752 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12753 else {
12754 ret = NULL;
12755 xmlFreeDoc(ctxt->myDoc);
12756 ctxt->myDoc = NULL;
12757 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012758 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012759 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012760 xmlFreeParserCtxt(ctxt);
12761
12762 return(ret);
12763}
12764
12765/**
12766 * xmlParseDoc:
12767 * @cur: a pointer to an array of xmlChar
12768 *
12769 * parse an XML in-memory document and build a tree.
12770 *
12771 * Returns the resulting document tree
12772 */
12773
12774xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012775xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012776 return(xmlSAXParseDoc(NULL, cur, 0));
12777}
Daniel Veillard81273902003-09-30 00:43:48 +000012778#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012779
Daniel Veillard81273902003-09-30 00:43:48 +000012780#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012781/************************************************************************
12782 * *
12783 * Specific function to keep track of entities references *
12784 * and used by the XSLT debugger *
12785 * *
12786 ************************************************************************/
12787
12788static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12789
12790/**
12791 * xmlAddEntityReference:
12792 * @ent : A valid entity
12793 * @firstNode : A valid first node for children of entity
12794 * @lastNode : A valid last node of children entity
12795 *
12796 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12797 */
12798static void
12799xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12800 xmlNodePtr lastNode)
12801{
12802 if (xmlEntityRefFunc != NULL) {
12803 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12804 }
12805}
12806
12807
12808/**
12809 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012810 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012811 *
12812 * Set the function to call call back when a xml reference has been made
12813 */
12814void
12815xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12816{
12817 xmlEntityRefFunc = func;
12818}
Daniel Veillard81273902003-09-30 00:43:48 +000012819#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012820
12821/************************************************************************
12822 * *
12823 * Miscellaneous *
12824 * *
12825 ************************************************************************/
12826
12827#ifdef LIBXML_XPATH_ENABLED
12828#include <libxml/xpath.h>
12829#endif
12830
Daniel Veillardffa3c742005-07-21 13:24:09 +000012831extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012832static int xmlParserInitialized = 0;
12833
12834/**
12835 * xmlInitParser:
12836 *
12837 * Initialization function for the XML parser.
12838 * This is not reentrant. Call once before processing in case of
12839 * use in multithreaded programs.
12840 */
12841
12842void
12843xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012844 if (xmlParserInitialized != 0)
12845 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012846
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012847 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12848 (xmlGenericError == NULL))
12849 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012850 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012851 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012852 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012853 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012854 xmlDefaultSAXHandlerInit();
12855 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012856#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012857 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012858#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012859#ifdef LIBXML_HTML_ENABLED
12860 htmlInitAutoClose();
12861 htmlDefaultSAXHandlerInit();
12862#endif
12863#ifdef LIBXML_XPATH_ENABLED
12864 xmlXPathInit();
12865#endif
12866 xmlParserInitialized = 1;
12867}
12868
12869/**
12870 * xmlCleanupParser:
12871 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012872 * Cleanup function for the XML library. It tries to reclaim all
12873 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012874 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012875 * function should not prevent reusing the library but one should
12876 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012877 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012878 */
12879
12880void
12881xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012882 if (!xmlParserInitialized)
12883 return;
12884
Owen Taylor3473f882001-02-23 17:55:21 +000012885 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012886#ifdef LIBXML_CATALOG_ENABLED
12887 xmlCatalogCleanup();
12888#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012889 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012890 xmlCleanupInputCallbacks();
12891#ifdef LIBXML_OUTPUT_ENABLED
12892 xmlCleanupOutputCallbacks();
12893#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012894#ifdef LIBXML_SCHEMAS_ENABLED
12895 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012896 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012897#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012898 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012899 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012900 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012901 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012902 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012903}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012904
12905/************************************************************************
12906 * *
12907 * New set (2.6.0) of simpler and more flexible APIs *
12908 * *
12909 ************************************************************************/
12910
12911/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012912 * DICT_FREE:
12913 * @str: a string
12914 *
12915 * Free a string if it is not owned by the "dict" dictionnary in the
12916 * current scope
12917 */
12918#define DICT_FREE(str) \
12919 if ((str) && ((!dict) || \
12920 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12921 xmlFree((char *)(str));
12922
12923/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012924 * xmlCtxtReset:
12925 * @ctxt: an XML parser context
12926 *
12927 * Reset a parser context
12928 */
12929void
12930xmlCtxtReset(xmlParserCtxtPtr ctxt)
12931{
12932 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012933 xmlDictPtr dict;
12934
12935 if (ctxt == NULL)
12936 return;
12937
12938 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012939
12940 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12941 xmlFreeInputStream(input);
12942 }
12943 ctxt->inputNr = 0;
12944 ctxt->input = NULL;
12945
12946 ctxt->spaceNr = 0;
12947 ctxt->spaceTab[0] = -1;
12948 ctxt->space = &ctxt->spaceTab[0];
12949
12950
12951 ctxt->nodeNr = 0;
12952 ctxt->node = NULL;
12953
12954 ctxt->nameNr = 0;
12955 ctxt->name = NULL;
12956
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012957 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012958 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012959 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012960 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012961 DICT_FREE(ctxt->directory);
12962 ctxt->directory = NULL;
12963 DICT_FREE(ctxt->extSubURI);
12964 ctxt->extSubURI = NULL;
12965 DICT_FREE(ctxt->extSubSystem);
12966 ctxt->extSubSystem = NULL;
12967 if (ctxt->myDoc != NULL)
12968 xmlFreeDoc(ctxt->myDoc);
12969 ctxt->myDoc = NULL;
12970
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 ctxt->standalone = -1;
12972 ctxt->hasExternalSubset = 0;
12973 ctxt->hasPErefs = 0;
12974 ctxt->html = 0;
12975 ctxt->external = 0;
12976 ctxt->instate = XML_PARSER_START;
12977 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012978
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012979 ctxt->wellFormed = 1;
12980 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012981 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012982 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012983#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012984 ctxt->vctxt.userData = ctxt;
12985 ctxt->vctxt.error = xmlParserValidityError;
12986 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012987#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012988 ctxt->record_info = 0;
12989 ctxt->nbChars = 0;
12990 ctxt->checkIndex = 0;
12991 ctxt->inSubset = 0;
12992 ctxt->errNo = XML_ERR_OK;
12993 ctxt->depth = 0;
12994 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12995 ctxt->catalogs = NULL;
12996 xmlInitNodeInfoSeq(&ctxt->node_seq);
12997
12998 if (ctxt->attsDefault != NULL) {
12999 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13000 ctxt->attsDefault = NULL;
13001 }
13002 if (ctxt->attsSpecial != NULL) {
13003 xmlHashFree(ctxt->attsSpecial, NULL);
13004 ctxt->attsSpecial = NULL;
13005 }
13006
Daniel Veillard4432df22003-09-28 18:58:27 +000013007#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 if (ctxt->catalogs != NULL)
13009 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013010#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013011 if (ctxt->lastError.code != XML_ERR_OK)
13012 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013013}
13014
13015/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013016 * xmlCtxtResetPush:
13017 * @ctxt: an XML parser context
13018 * @chunk: a pointer to an array of chars
13019 * @size: number of chars in the array
13020 * @filename: an optional file name or URI
13021 * @encoding: the document encoding, or NULL
13022 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013023 * Reset a push parser context
13024 *
13025 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013026 */
13027int
13028xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13029 int size, const char *filename, const char *encoding)
13030{
13031 xmlParserInputPtr inputStream;
13032 xmlParserInputBufferPtr buf;
13033 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13034
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013035 if (ctxt == NULL)
13036 return(1);
13037
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013038 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13039 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13040
13041 buf = xmlAllocParserInputBuffer(enc);
13042 if (buf == NULL)
13043 return(1);
13044
13045 if (ctxt == NULL) {
13046 xmlFreeParserInputBuffer(buf);
13047 return(1);
13048 }
13049
13050 xmlCtxtReset(ctxt);
13051
13052 if (ctxt->pushTab == NULL) {
13053 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13054 sizeof(xmlChar *));
13055 if (ctxt->pushTab == NULL) {
13056 xmlErrMemory(ctxt, NULL);
13057 xmlFreeParserInputBuffer(buf);
13058 return(1);
13059 }
13060 }
13061
13062 if (filename == NULL) {
13063 ctxt->directory = NULL;
13064 } else {
13065 ctxt->directory = xmlParserGetDirectory(filename);
13066 }
13067
13068 inputStream = xmlNewInputStream(ctxt);
13069 if (inputStream == NULL) {
13070 xmlFreeParserInputBuffer(buf);
13071 return(1);
13072 }
13073
13074 if (filename == NULL)
13075 inputStream->filename = NULL;
13076 else
13077 inputStream->filename = (char *)
13078 xmlCanonicPath((const xmlChar *) filename);
13079 inputStream->buf = buf;
13080 inputStream->base = inputStream->buf->buffer->content;
13081 inputStream->cur = inputStream->buf->buffer->content;
13082 inputStream->end =
13083 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13084
13085 inputPush(ctxt, inputStream);
13086
13087 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13088 (ctxt->input->buf != NULL)) {
13089 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13090 int cur = ctxt->input->cur - ctxt->input->base;
13091
13092 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13093
13094 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13095 ctxt->input->cur = ctxt->input->base + cur;
13096 ctxt->input->end =
13097 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13098 use];
13099#ifdef DEBUG_PUSH
13100 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13101#endif
13102 }
13103
13104 if (encoding != NULL) {
13105 xmlCharEncodingHandlerPtr hdlr;
13106
13107 hdlr = xmlFindCharEncodingHandler(encoding);
13108 if (hdlr != NULL) {
13109 xmlSwitchToEncoding(ctxt, hdlr);
13110 } else {
13111 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13112 "Unsupported encoding %s\n", BAD_CAST encoding);
13113 }
13114 } else if (enc != XML_CHAR_ENCODING_NONE) {
13115 xmlSwitchEncoding(ctxt, enc);
13116 }
13117
13118 return(0);
13119}
13120
13121/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013122 * xmlCtxtUseOptions:
13123 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013124 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013125 *
13126 * Applies the options to the parser context
13127 *
13128 * Returns 0 in case of success, the set of unknown or unimplemented options
13129 * in case of error.
13130 */
13131int
13132xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13133{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013134 if (ctxt == NULL)
13135 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013136 if (options & XML_PARSE_RECOVER) {
13137 ctxt->recovery = 1;
13138 options -= XML_PARSE_RECOVER;
13139 } else
13140 ctxt->recovery = 0;
13141 if (options & XML_PARSE_DTDLOAD) {
13142 ctxt->loadsubset = XML_DETECT_IDS;
13143 options -= XML_PARSE_DTDLOAD;
13144 } else
13145 ctxt->loadsubset = 0;
13146 if (options & XML_PARSE_DTDATTR) {
13147 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13148 options -= XML_PARSE_DTDATTR;
13149 }
13150 if (options & XML_PARSE_NOENT) {
13151 ctxt->replaceEntities = 1;
13152 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13153 options -= XML_PARSE_NOENT;
13154 } else
13155 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013156 if (options & XML_PARSE_PEDANTIC) {
13157 ctxt->pedantic = 1;
13158 options -= XML_PARSE_PEDANTIC;
13159 } else
13160 ctxt->pedantic = 0;
13161 if (options & XML_PARSE_NOBLANKS) {
13162 ctxt->keepBlanks = 0;
13163 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13164 options -= XML_PARSE_NOBLANKS;
13165 } else
13166 ctxt->keepBlanks = 1;
13167 if (options & XML_PARSE_DTDVALID) {
13168 ctxt->validate = 1;
13169 if (options & XML_PARSE_NOWARNING)
13170 ctxt->vctxt.warning = NULL;
13171 if (options & XML_PARSE_NOERROR)
13172 ctxt->vctxt.error = NULL;
13173 options -= XML_PARSE_DTDVALID;
13174 } else
13175 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013176 if (options & XML_PARSE_NOWARNING) {
13177 ctxt->sax->warning = NULL;
13178 options -= XML_PARSE_NOWARNING;
13179 }
13180 if (options & XML_PARSE_NOERROR) {
13181 ctxt->sax->error = NULL;
13182 ctxt->sax->fatalError = NULL;
13183 options -= XML_PARSE_NOERROR;
13184 }
Daniel Veillard81273902003-09-30 00:43:48 +000013185#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013186 if (options & XML_PARSE_SAX1) {
13187 ctxt->sax->startElement = xmlSAX2StartElement;
13188 ctxt->sax->endElement = xmlSAX2EndElement;
13189 ctxt->sax->startElementNs = NULL;
13190 ctxt->sax->endElementNs = NULL;
13191 ctxt->sax->initialized = 1;
13192 options -= XML_PARSE_SAX1;
13193 }
Daniel Veillard81273902003-09-30 00:43:48 +000013194#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013195 if (options & XML_PARSE_NODICT) {
13196 ctxt->dictNames = 0;
13197 options -= XML_PARSE_NODICT;
13198 } else {
13199 ctxt->dictNames = 1;
13200 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013201 if (options & XML_PARSE_NOCDATA) {
13202 ctxt->sax->cdataBlock = NULL;
13203 options -= XML_PARSE_NOCDATA;
13204 }
13205 if (options & XML_PARSE_NSCLEAN) {
13206 ctxt->options |= XML_PARSE_NSCLEAN;
13207 options -= XML_PARSE_NSCLEAN;
13208 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013209 if (options & XML_PARSE_NONET) {
13210 ctxt->options |= XML_PARSE_NONET;
13211 options -= XML_PARSE_NONET;
13212 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013213 if (options & XML_PARSE_COMPACT) {
13214 ctxt->options |= XML_PARSE_COMPACT;
13215 options -= XML_PARSE_COMPACT;
13216 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013217 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013218 return (options);
13219}
13220
13221/**
13222 * xmlDoRead:
13223 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013224 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013225 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013226 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013227 * @reuse: keep the context for reuse
13228 *
13229 * Common front-end for the xmlRead functions
13230 *
13231 * Returns the resulting document tree or NULL
13232 */
13233static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013234xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13235 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236{
13237 xmlDocPtr ret;
13238
13239 xmlCtxtUseOptions(ctxt, options);
13240 if (encoding != NULL) {
13241 xmlCharEncodingHandlerPtr hdlr;
13242
13243 hdlr = xmlFindCharEncodingHandler(encoding);
13244 if (hdlr != NULL)
13245 xmlSwitchToEncoding(ctxt, hdlr);
13246 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013247 if ((URL != NULL) && (ctxt->input != NULL) &&
13248 (ctxt->input->filename == NULL))
13249 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013250 xmlParseDocument(ctxt);
13251 if ((ctxt->wellFormed) || ctxt->recovery)
13252 ret = ctxt->myDoc;
13253 else {
13254 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013255 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013256 xmlFreeDoc(ctxt->myDoc);
13257 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013258 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013259 ctxt->myDoc = NULL;
13260 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013261 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013262 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013263
13264 return (ret);
13265}
13266
13267/**
13268 * xmlReadDoc:
13269 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013270 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013272 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013273 *
13274 * parse an XML in-memory document and build a tree.
13275 *
13276 * Returns the resulting document tree
13277 */
13278xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013279xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280{
13281 xmlParserCtxtPtr ctxt;
13282
13283 if (cur == NULL)
13284 return (NULL);
13285
13286 ctxt = xmlCreateDocParserCtxt(cur);
13287 if (ctxt == NULL)
13288 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013289 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013290}
13291
13292/**
13293 * xmlReadFile:
13294 * @filename: a file or URL
13295 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013296 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013297 *
13298 * parse an XML file from the filesystem or the network.
13299 *
13300 * Returns the resulting document tree
13301 */
13302xmlDocPtr
13303xmlReadFile(const char *filename, const char *encoding, int options)
13304{
13305 xmlParserCtxtPtr ctxt;
13306
Daniel Veillard61b93382003-11-03 14:28:31 +000013307 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013308 if (ctxt == NULL)
13309 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013310 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013311}
13312
13313/**
13314 * xmlReadMemory:
13315 * @buffer: a pointer to a char array
13316 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013317 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013318 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013319 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013320 *
13321 * parse an XML in-memory document and build a tree.
13322 *
13323 * Returns the resulting document tree
13324 */
13325xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013326xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327{
13328 xmlParserCtxtPtr ctxt;
13329
13330 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13331 if (ctxt == NULL)
13332 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013333 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013334}
13335
13336/**
13337 * xmlReadFd:
13338 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013339 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013340 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013341 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013342 *
13343 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013344 * NOTE that the file descriptor will not be closed when the
13345 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013346 *
13347 * Returns the resulting document tree
13348 */
13349xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013350xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013351{
13352 xmlParserCtxtPtr ctxt;
13353 xmlParserInputBufferPtr input;
13354 xmlParserInputPtr stream;
13355
13356 if (fd < 0)
13357 return (NULL);
13358
13359 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13360 if (input == NULL)
13361 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013362 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013363 ctxt = xmlNewParserCtxt();
13364 if (ctxt == NULL) {
13365 xmlFreeParserInputBuffer(input);
13366 return (NULL);
13367 }
13368 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13369 if (stream == NULL) {
13370 xmlFreeParserInputBuffer(input);
13371 xmlFreeParserCtxt(ctxt);
13372 return (NULL);
13373 }
13374 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013375 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013376}
13377
13378/**
13379 * xmlReadIO:
13380 * @ioread: an I/O read function
13381 * @ioclose: an I/O close function
13382 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013383 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013384 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013385 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013386 *
13387 * parse an XML document from I/O functions and source and build a tree.
13388 *
13389 * Returns the resulting document tree
13390 */
13391xmlDocPtr
13392xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013393 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013394{
13395 xmlParserCtxtPtr ctxt;
13396 xmlParserInputBufferPtr input;
13397 xmlParserInputPtr stream;
13398
13399 if (ioread == NULL)
13400 return (NULL);
13401
13402 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13403 XML_CHAR_ENCODING_NONE);
13404 if (input == NULL)
13405 return (NULL);
13406 ctxt = xmlNewParserCtxt();
13407 if (ctxt == NULL) {
13408 xmlFreeParserInputBuffer(input);
13409 return (NULL);
13410 }
13411 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13412 if (stream == NULL) {
13413 xmlFreeParserInputBuffer(input);
13414 xmlFreeParserCtxt(ctxt);
13415 return (NULL);
13416 }
13417 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013418 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013419}
13420
13421/**
13422 * xmlCtxtReadDoc:
13423 * @ctxt: an XML parser context
13424 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013426 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013427 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013428 *
13429 * parse an XML in-memory document and build a tree.
13430 * This reuses the existing @ctxt parser context
13431 *
13432 * Returns the resulting document tree
13433 */
13434xmlDocPtr
13435xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013436 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013437{
13438 xmlParserInputPtr stream;
13439
13440 if (cur == NULL)
13441 return (NULL);
13442 if (ctxt == NULL)
13443 return (NULL);
13444
13445 xmlCtxtReset(ctxt);
13446
13447 stream = xmlNewStringInputStream(ctxt, cur);
13448 if (stream == NULL) {
13449 return (NULL);
13450 }
13451 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013452 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013453}
13454
13455/**
13456 * xmlCtxtReadFile:
13457 * @ctxt: an XML parser context
13458 * @filename: a file or URL
13459 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013460 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013461 *
13462 * parse an XML file from the filesystem or the network.
13463 * This reuses the existing @ctxt parser context
13464 *
13465 * Returns the resulting document tree
13466 */
13467xmlDocPtr
13468xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13469 const char *encoding, int options)
13470{
13471 xmlParserInputPtr stream;
13472
13473 if (filename == NULL)
13474 return (NULL);
13475 if (ctxt == NULL)
13476 return (NULL);
13477
13478 xmlCtxtReset(ctxt);
13479
Daniel Veillard29614c72004-11-26 10:47:26 +000013480 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013481 if (stream == NULL) {
13482 return (NULL);
13483 }
13484 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013485 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013486}
13487
13488/**
13489 * xmlCtxtReadMemory:
13490 * @ctxt: an XML parser context
13491 * @buffer: a pointer to a char array
13492 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013493 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013494 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013495 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013496 *
13497 * parse an XML in-memory document and build a tree.
13498 * This reuses the existing @ctxt parser context
13499 *
13500 * Returns the resulting document tree
13501 */
13502xmlDocPtr
13503xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013504 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013505{
13506 xmlParserInputBufferPtr input;
13507 xmlParserInputPtr stream;
13508
13509 if (ctxt == NULL)
13510 return (NULL);
13511 if (buffer == NULL)
13512 return (NULL);
13513
13514 xmlCtxtReset(ctxt);
13515
13516 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13517 if (input == NULL) {
13518 return(NULL);
13519 }
13520
13521 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13522 if (stream == NULL) {
13523 xmlFreeParserInputBuffer(input);
13524 return(NULL);
13525 }
13526
13527 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013528 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013529}
13530
13531/**
13532 * xmlCtxtReadFd:
13533 * @ctxt: an XML parser context
13534 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013535 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013536 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013537 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013538 *
13539 * parse an XML from a file descriptor and build a tree.
13540 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013541 * NOTE that the file descriptor will not be closed when the
13542 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013543 *
13544 * Returns the resulting document tree
13545 */
13546xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013547xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13548 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013549{
13550 xmlParserInputBufferPtr input;
13551 xmlParserInputPtr stream;
13552
13553 if (fd < 0)
13554 return (NULL);
13555 if (ctxt == NULL)
13556 return (NULL);
13557
13558 xmlCtxtReset(ctxt);
13559
13560
13561 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13562 if (input == NULL)
13563 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013564 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013565 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13566 if (stream == NULL) {
13567 xmlFreeParserInputBuffer(input);
13568 return (NULL);
13569 }
13570 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013571 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013572}
13573
13574/**
13575 * xmlCtxtReadIO:
13576 * @ctxt: an XML parser context
13577 * @ioread: an I/O read function
13578 * @ioclose: an I/O close function
13579 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013580 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013581 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013582 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013583 *
13584 * parse an XML document from I/O functions and source and build a tree.
13585 * This reuses the existing @ctxt parser context
13586 *
13587 * Returns the resulting document tree
13588 */
13589xmlDocPtr
13590xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13591 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013592 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013593 const char *encoding, int options)
13594{
13595 xmlParserInputBufferPtr input;
13596 xmlParserInputPtr stream;
13597
13598 if (ioread == NULL)
13599 return (NULL);
13600 if (ctxt == NULL)
13601 return (NULL);
13602
13603 xmlCtxtReset(ctxt);
13604
13605 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13606 XML_CHAR_ENCODING_NONE);
13607 if (input == NULL)
13608 return (NULL);
13609 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13610 if (stream == NULL) {
13611 xmlFreeParserInputBuffer(input);
13612 return (NULL);
13613 }
13614 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013615 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013616}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013617
13618#define bottom_parser
13619#include "elfgcchack.h"