blob: ceb91d2e3da9d6f968ac498c2f9e9cf32397cb7e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1))
Owen Taylor3473f882001-02-23 17:55:21 +00002366 return(0);
2367
2368 /*
2369 * Check that the string is made of blanks
2370 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002371 if (blank_chars == 0) {
2372 for (i = 0;i < len;i++)
2373 if (!(IS_BLANK_CH(str[i]))) return(0);
2374 }
Owen Taylor3473f882001-02-23 17:55:21 +00002375
2376 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002377 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002378 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002379 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002380 if (ctxt->myDoc != NULL) {
2381 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2382 if (ret == 0) return(1);
2383 if (ret == 1) return(0);
2384 }
2385
2386 /*
2387 * Otherwise, heuristic :-\
2388 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002389 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002390 if ((ctxt->node->children == NULL) &&
2391 (RAW == '<') && (NXT(1) == '/')) return(0);
2392
2393 lastChild = xmlGetLastChild(ctxt->node);
2394 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002395 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2396 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002397 } else if (xmlNodeIsText(lastChild))
2398 return(0);
2399 else if ((ctxt->node->children != NULL) &&
2400 (xmlNodeIsText(ctxt->node->children)))
2401 return(0);
2402 return(1);
2403}
2404
Owen Taylor3473f882001-02-23 17:55:21 +00002405/************************************************************************
2406 * *
2407 * Extra stuff for namespace support *
2408 * Relates to http://www.w3.org/TR/WD-xml-names *
2409 * *
2410 ************************************************************************/
2411
2412/**
2413 * xmlSplitQName:
2414 * @ctxt: an XML parser context
2415 * @name: an XML parser context
2416 * @prefix: a xmlChar **
2417 *
2418 * parse an UTF8 encoded XML qualified name string
2419 *
2420 * [NS 5] QName ::= (Prefix ':')? LocalPart
2421 *
2422 * [NS 6] Prefix ::= NCName
2423 *
2424 * [NS 7] LocalPart ::= NCName
2425 *
2426 * Returns the local part, and prefix is updated
2427 * to get the Prefix if any.
2428 */
2429
2430xmlChar *
2431xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2432 xmlChar buf[XML_MAX_NAMELEN + 5];
2433 xmlChar *buffer = NULL;
2434 int len = 0;
2435 int max = XML_MAX_NAMELEN;
2436 xmlChar *ret = NULL;
2437 const xmlChar *cur = name;
2438 int c;
2439
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002440 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002441 *prefix = NULL;
2442
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002443 if (cur == NULL) return(NULL);
2444
Owen Taylor3473f882001-02-23 17:55:21 +00002445#ifndef XML_XML_NAMESPACE
2446 /* xml: prefix is not really a namespace */
2447 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2448 (cur[2] == 'l') && (cur[3] == ':'))
2449 return(xmlStrdup(name));
2450#endif
2451
Daniel Veillard597bc482003-07-24 16:08:28 +00002452 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (cur[0] == ':')
2454 return(xmlStrdup(name));
2455
2456 c = *cur++;
2457 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2458 buf[len++] = c;
2459 c = *cur++;
2460 }
2461 if (len >= max) {
2462 /*
2463 * Okay someone managed to make a huge name, so he's ready to pay
2464 * for the processing speed.
2465 */
2466 max = len * 2;
2467
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002468 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002469 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002470 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002471 return(NULL);
2472 }
2473 memcpy(buffer, buf, len);
2474 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2475 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002476 xmlChar *tmp;
2477
Owen Taylor3473f882001-02-23 17:55:21 +00002478 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002479 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002480 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002481 if (tmp == NULL) {
2482 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002483 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
2485 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002486 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 }
2488 buffer[len++] = c;
2489 c = *cur++;
2490 }
2491 buffer[len] = 0;
2492 }
2493
Daniel Veillard597bc482003-07-24 16:08:28 +00002494 /* nasty but well=formed
2495 if ((c == ':') && (*cur == 0)) {
2496 return(xmlStrdup(name));
2497 } */
2498
Owen Taylor3473f882001-02-23 17:55:21 +00002499 if (buffer == NULL)
2500 ret = xmlStrndup(buf, len);
2501 else {
2502 ret = buffer;
2503 buffer = NULL;
2504 max = XML_MAX_NAMELEN;
2505 }
2506
2507
2508 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002509 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002510 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002511 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002512 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002513 }
Owen Taylor3473f882001-02-23 17:55:21 +00002514 len = 0;
2515
Daniel Veillardbb284f42002-10-16 18:02:47 +00002516 /*
2517 * Check that the first character is proper to start
2518 * a new name
2519 */
2520 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2521 ((c >= 0x41) && (c <= 0x5A)) ||
2522 (c == '_') || (c == ':'))) {
2523 int l;
2524 int first = CUR_SCHAR(cur, l);
2525
2526 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002527 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002528 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002529 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002530 }
2531 }
2532 cur++;
2533
Owen Taylor3473f882001-02-23 17:55:21 +00002534 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2535 buf[len++] = c;
2536 c = *cur++;
2537 }
2538 if (len >= max) {
2539 /*
2540 * Okay someone managed to make a huge name, so he's ready to pay
2541 * for the processing speed.
2542 */
2543 max = len * 2;
2544
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002546 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002547 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002548 return(NULL);
2549 }
2550 memcpy(buffer, buf, len);
2551 while (c != 0) { /* tested bigname2.xml */
2552 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002553 xmlChar *tmp;
2554
Owen Taylor3473f882001-02-23 17:55:21 +00002555 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002557 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002558 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002559 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002560 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002561 return(NULL);
2562 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002564 }
2565 buffer[len++] = c;
2566 c = *cur++;
2567 }
2568 buffer[len] = 0;
2569 }
2570
2571 if (buffer == NULL)
2572 ret = xmlStrndup(buf, len);
2573 else {
2574 ret = buffer;
2575 }
2576 }
2577
2578 return(ret);
2579}
2580
2581/************************************************************************
2582 * *
2583 * The parser itself *
2584 * Relates to http://www.w3.org/TR/REC-xml *
2585 * *
2586 ************************************************************************/
2587
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002588static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002589static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002590 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002591
Owen Taylor3473f882001-02-23 17:55:21 +00002592/**
2593 * xmlParseName:
2594 * @ctxt: an XML parser context
2595 *
2596 * parse an XML name.
2597 *
2598 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2599 * CombiningChar | Extender
2600 *
2601 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2602 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002603 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002604 *
2605 * Returns the Name parsed or NULL
2606 */
2607
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002608const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002609xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002610 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002612 int count = 0;
2613
2614 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002615
2616 /*
2617 * Accelerator for simple ASCII names
2618 */
2619 in = ctxt->input->cur;
2620 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2621 ((*in >= 0x41) && (*in <= 0x5A)) ||
2622 (*in == '_') || (*in == ':')) {
2623 in++;
2624 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2625 ((*in >= 0x41) && (*in <= 0x5A)) ||
2626 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002627 (*in == '_') || (*in == '-') ||
2628 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002629 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002631 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002632 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002633 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002634 ctxt->nbChars += count;
2635 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002636 if (ret == NULL)
2637 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002638 return(ret);
2639 }
2640 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002641 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002642}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002643
Daniel Veillard46de64e2002-05-29 08:21:33 +00002644/**
2645 * xmlParseNameAndCompare:
2646 * @ctxt: an XML parser context
2647 *
2648 * parse an XML name and compares for match
2649 * (specialized for endtag parsing)
2650 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002651 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2652 * and the name for mismatch
2653 */
2654
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002655static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002656xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002657 register const xmlChar *cmp = other;
2658 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002659 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002660
2661 GROW;
2662
2663 in = ctxt->input->cur;
2664 while (*in != 0 && *in == *cmp) {
2665 ++in;
2666 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002667 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002668 }
William M. Brack76e95df2003-10-18 16:20:14 +00002669 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002670 /* success */
2671 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002672 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 }
2674 /* failure (or end of input buffer), check with full function */
2675 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002676 /* strings coming from the dictionnary direct compare possible */
2677 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002679 }
2680 return ret;
2681}
2682
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002683static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002684xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002685 int len = 0, l;
2686 int c;
2687 int count = 0;
2688
2689 /*
2690 * Handler for more complex cases
2691 */
2692 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 c = CUR_CHAR(l);
2694 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2695 (!IS_LETTER(c) && (c != '_') &&
2696 (c != ':'))) {
2697 return(NULL);
2698 }
2699
2700 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002701 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002702 (c == '.') || (c == '-') ||
2703 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002704 (IS_COMBINING(c)) ||
2705 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002706 if (count++ > 100) {
2707 count = 0;
2708 GROW;
2709 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002710 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002711 NEXTL(l);
2712 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002713 }
Daniel Veillard96688262005-08-23 18:14:12 +00002714 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2715 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002716 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002717}
2718
2719/**
2720 * xmlParseStringName:
2721 * @ctxt: an XML parser context
2722 * @str: a pointer to the string pointer (IN/OUT)
2723 *
2724 * parse an XML name.
2725 *
2726 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2727 * CombiningChar | Extender
2728 *
2729 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2730 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002731 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002732 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002733 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002734 * is updated to the current location in the string.
2735 */
2736
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002737static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002738xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2739 xmlChar buf[XML_MAX_NAMELEN + 5];
2740 const xmlChar *cur = *str;
2741 int len = 0, l;
2742 int c;
2743
2744 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002745 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002746 (c != ':')) {
2747 return(NULL);
2748 }
2749
William M. Brack871611b2003-10-18 04:53:14 +00002750 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002751 (c == '.') || (c == '-') ||
2752 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002753 (IS_COMBINING(c)) ||
2754 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002755 COPY_BUF(l,buf,len,c);
2756 cur += l;
2757 c = CUR_SCHAR(cur, l);
2758 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2759 /*
2760 * Okay someone managed to make a huge name, so he's ready to pay
2761 * for the processing speed.
2762 */
2763 xmlChar *buffer;
2764 int max = len * 2;
2765
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002766 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002767 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002768 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002769 return(NULL);
2770 }
2771 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002772 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002773 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002774 (c == '.') || (c == '-') ||
2775 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002776 (IS_COMBINING(c)) ||
2777 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002779 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002780 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002781 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002782 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002783 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002784 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002785 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002786 return(NULL);
2787 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002789 }
2790 COPY_BUF(l,buffer,len,c);
2791 cur += l;
2792 c = CUR_SCHAR(cur, l);
2793 }
2794 buffer[len] = 0;
2795 *str = cur;
2796 return(buffer);
2797 }
2798 }
2799 *str = cur;
2800 return(xmlStrndup(buf, len));
2801}
2802
2803/**
2804 * xmlParseNmtoken:
2805 * @ctxt: an XML parser context
2806 *
2807 * parse an XML Nmtoken.
2808 *
2809 * [7] Nmtoken ::= (NameChar)+
2810 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002811 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002812 *
2813 * Returns the Nmtoken parsed or NULL
2814 */
2815
2816xmlChar *
2817xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2818 xmlChar buf[XML_MAX_NAMELEN + 5];
2819 int len = 0, l;
2820 int c;
2821 int count = 0;
2822
2823 GROW;
2824 c = CUR_CHAR(l);
2825
William M. Brack871611b2003-10-18 04:53:14 +00002826 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002827 (c == '.') || (c == '-') ||
2828 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002829 (IS_COMBINING(c)) ||
2830 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002831 if (count++ > 100) {
2832 count = 0;
2833 GROW;
2834 }
2835 COPY_BUF(l,buf,len,c);
2836 NEXTL(l);
2837 c = CUR_CHAR(l);
2838 if (len >= XML_MAX_NAMELEN) {
2839 /*
2840 * Okay someone managed to make a huge token, so he's ready to pay
2841 * for the processing speed.
2842 */
2843 xmlChar *buffer;
2844 int max = len * 2;
2845
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002846 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002847 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002848 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002849 return(NULL);
2850 }
2851 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002852 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002853 (c == '.') || (c == '-') ||
2854 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002855 (IS_COMBINING(c)) ||
2856 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002857 if (count++ > 100) {
2858 count = 0;
2859 GROW;
2860 }
2861 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002862 xmlChar *tmp;
2863
Owen Taylor3473f882001-02-23 17:55:21 +00002864 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002866 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002867 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002868 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002869 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002870 return(NULL);
2871 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002873 }
2874 COPY_BUF(l,buffer,len,c);
2875 NEXTL(l);
2876 c = CUR_CHAR(l);
2877 }
2878 buffer[len] = 0;
2879 return(buffer);
2880 }
2881 }
2882 if (len == 0)
2883 return(NULL);
2884 return(xmlStrndup(buf, len));
2885}
2886
2887/**
2888 * xmlParseEntityValue:
2889 * @ctxt: an XML parser context
2890 * @orig: if non-NULL store a copy of the original entity value
2891 *
2892 * parse a value for ENTITY declarations
2893 *
2894 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2895 * "'" ([^%&'] | PEReference | Reference)* "'"
2896 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002897 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002898 */
2899
2900xmlChar *
2901xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2902 xmlChar *buf = NULL;
2903 int len = 0;
2904 int size = XML_PARSER_BUFFER_SIZE;
2905 int c, l;
2906 xmlChar stop;
2907 xmlChar *ret = NULL;
2908 const xmlChar *cur = NULL;
2909 xmlParserInputPtr input;
2910
2911 if (RAW == '"') stop = '"';
2912 else if (RAW == '\'') stop = '\'';
2913 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002914 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 return(NULL);
2916 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002917 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002918 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002919 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002920 return(NULL);
2921 }
2922
2923 /*
2924 * The content of the entity definition is copied in a buffer.
2925 */
2926
2927 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2928 input = ctxt->input;
2929 GROW;
2930 NEXT;
2931 c = CUR_CHAR(l);
2932 /*
2933 * NOTE: 4.4.5 Included in Literal
2934 * When a parameter entity reference appears in a literal entity
2935 * value, ... a single or double quote character in the replacement
2936 * text is always treated as a normal data character and will not
2937 * terminate the literal.
2938 * In practice it means we stop the loop only when back at parsing
2939 * the initial entity and the quote is found
2940 */
William M. Brack871611b2003-10-18 04:53:14 +00002941 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002942 (ctxt->input != input))) {
2943 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002944 xmlChar *tmp;
2945
Owen Taylor3473f882001-02-23 17:55:21 +00002946 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2948 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002949 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002951 return(NULL);
2952 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002954 }
2955 COPY_BUF(l,buf,len,c);
2956 NEXTL(l);
2957 /*
2958 * Pop-up of finished entities.
2959 */
2960 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2961 xmlPopInput(ctxt);
2962
2963 GROW;
2964 c = CUR_CHAR(l);
2965 if (c == 0) {
2966 GROW;
2967 c = CUR_CHAR(l);
2968 }
2969 }
2970 buf[len] = 0;
2971
2972 /*
2973 * Raise problem w.r.t. '&' and '%' being used in non-entities
2974 * reference constructs. Note Charref will be handled in
2975 * xmlStringDecodeEntities()
2976 */
2977 cur = buf;
2978 while (*cur != 0) { /* non input consuming */
2979 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2980 xmlChar *name;
2981 xmlChar tmp = *cur;
2982
2983 cur++;
2984 name = xmlParseStringName(ctxt, &cur);
2985 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002986 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002987 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002988 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002989 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002990 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2991 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002992 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002993 }
2994 if (name != NULL)
2995 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002996 if (*cur == 0)
2997 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002998 }
2999 cur++;
3000 }
3001
3002 /*
3003 * Then PEReference entities are substituted.
3004 */
3005 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003006 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003007 xmlFree(buf);
3008 } else {
3009 NEXT;
3010 /*
3011 * NOTE: 4.4.7 Bypassed
3012 * When a general entity reference appears in the EntityValue in
3013 * an entity declaration, it is bypassed and left as is.
3014 * so XML_SUBSTITUTE_REF is not set here.
3015 */
3016 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3017 0, 0, 0);
3018 if (orig != NULL)
3019 *orig = buf;
3020 else
3021 xmlFree(buf);
3022 }
3023
3024 return(ret);
3025}
3026
3027/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003028 * xmlParseAttValueComplex:
3029 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003030 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003031 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003032 *
3033 * parse a value for an attribute, this is the fallback function
3034 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003036 *
3037 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3038 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003039static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003040xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003041 xmlChar limit = 0;
3042 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003043 int len = 0;
3044 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003045 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 xmlChar *current = NULL;
3047 xmlEntityPtr ent;
3048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (NXT(0) == '"') {
3050 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3051 limit = '"';
3052 NEXT;
3053 } else if (NXT(0) == '\'') {
3054 limit = '\'';
3055 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3056 NEXT;
3057 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003058 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003059 return(NULL);
3060 }
3061
3062 /*
3063 * allocate a translation buffer.
3064 */
3065 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003066 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003067 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003068
3069 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003070 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003071 */
3072 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003073 while ((NXT(0) != limit) && /* checked */
3074 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003077 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (NXT(1) == '#') {
3079 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003080
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003082 if (ctxt->replaceEntities) {
3083 if (len > buf_size - 10) {
3084 growBuffer(buf);
3085 }
3086 buf[len++] = '&';
3087 } else {
3088 /*
3089 * The reparsing will be done in xmlStringGetNodeList()
3090 * called by the attribute() function in SAX.c
3091 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003092 if (len > buf_size - 10) {
3093 growBuffer(buf);
3094 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003095 buf[len++] = '&';
3096 buf[len++] = '#';
3097 buf[len++] = '3';
3098 buf[len++] = '8';
3099 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003100 }
3101 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003102 if (len > buf_size - 10) {
3103 growBuffer(buf);
3104 }
Owen Taylor3473f882001-02-23 17:55:21 +00003105 len += xmlCopyChar(0, &buf[len], val);
3106 }
3107 } else {
3108 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003109 if ((ent != NULL) &&
3110 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
3114 if ((ctxt->replaceEntities == 0) &&
3115 (ent->content[0] == '&')) {
3116 buf[len++] = '&';
3117 buf[len++] = '#';
3118 buf[len++] = '3';
3119 buf[len++] = '8';
3120 buf[len++] = ';';
3121 } else {
3122 buf[len++] = ent->content[0];
3123 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003124 } else if ((ent != NULL) &&
3125 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003126 xmlChar *rep;
3127
3128 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3129 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003130 XML_SUBSTITUTE_REF,
3131 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003132 if (rep != NULL) {
3133 current = rep;
3134 while (*current != 0) { /* non input consuming */
3135 buf[len++] = *current++;
3136 if (len > buf_size - 10) {
3137 growBuffer(buf);
3138 }
3139 }
3140 xmlFree(rep);
3141 }
3142 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003143 if (len > buf_size - 10) {
3144 growBuffer(buf);
3145 }
Owen Taylor3473f882001-02-23 17:55:21 +00003146 if (ent->content != NULL)
3147 buf[len++] = ent->content[0];
3148 }
3149 } else if (ent != NULL) {
3150 int i = xmlStrlen(ent->name);
3151 const xmlChar *cur = ent->name;
3152
3153 /*
3154 * This may look absurd but is needed to detect
3155 * entities problems
3156 */
3157 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3158 (ent->content != NULL)) {
3159 xmlChar *rep;
3160 rep = xmlStringDecodeEntities(ctxt, ent->content,
3161 XML_SUBSTITUTE_REF, 0, 0, 0);
3162 if (rep != NULL)
3163 xmlFree(rep);
3164 }
3165
3166 /*
3167 * Just output the reference
3168 */
3169 buf[len++] = '&';
3170 if (len > buf_size - i - 10) {
3171 growBuffer(buf);
3172 }
3173 for (;i > 0;i--)
3174 buf[len++] = *cur++;
3175 buf[len++] = ';';
3176 }
3177 }
3178 } else {
3179 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003180 if ((len != 0) || (!normalize)) {
3181 if ((!normalize) || (!in_space)) {
3182 COPY_BUF(l,buf,len,0x20);
3183 if (len > buf_size - 10) {
3184 growBuffer(buf);
3185 }
3186 }
3187 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003188 }
3189 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003190 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 COPY_BUF(l,buf,len,c);
3192 if (len > buf_size - 10) {
3193 growBuffer(buf);
3194 }
3195 }
3196 NEXTL(l);
3197 }
3198 GROW;
3199 c = CUR_CHAR(l);
3200 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003201 if ((in_space) && (normalize)) {
3202 while (buf[len - 1] == 0x20) len--;
3203 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003204 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003205 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003206 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003207 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003208 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3209 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else
3211 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003212 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003213 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003214
3215mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003216 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003218}
3219
3220/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003221 * xmlParseAttValue:
3222 * @ctxt: an XML parser context
3223 *
3224 * parse a value for an attribute
3225 * Note: the parser won't do substitution of entities here, this
3226 * will be handled later in xmlStringGetNodeList
3227 *
3228 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3229 * "'" ([^<&'] | Reference)* "'"
3230 *
3231 * 3.3.3 Attribute-Value Normalization:
3232 * Before the value of an attribute is passed to the application or
3233 * checked for validity, the XML processor must normalize it as follows:
3234 * - a character reference is processed by appending the referenced
3235 * character to the attribute value
3236 * - an entity reference is processed by recursively processing the
3237 * replacement text of the entity
3238 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3239 * appending #x20 to the normalized value, except that only a single
3240 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3241 * parsed entity or the literal entity value of an internal parsed entity
3242 * - other characters are processed by appending them to the normalized value
3243 * If the declared value is not CDATA, then the XML processor must further
3244 * process the normalized attribute value by discarding any leading and
3245 * trailing space (#x20) characters, and by replacing sequences of space
3246 * (#x20) characters by a single space (#x20) character.
3247 * All attributes for which no declaration has been read should be treated
3248 * by a non-validating parser as if declared CDATA.
3249 *
3250 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3251 */
3252
3253
3254xmlChar *
3255xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003256 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003257 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003258}
3259
3260/**
Owen Taylor3473f882001-02-23 17:55:21 +00003261 * xmlParseSystemLiteral:
3262 * @ctxt: an XML parser context
3263 *
3264 * parse an XML Literal
3265 *
3266 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3267 *
3268 * Returns the SystemLiteral parsed or NULL
3269 */
3270
3271xmlChar *
3272xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3273 xmlChar *buf = NULL;
3274 int len = 0;
3275 int size = XML_PARSER_BUFFER_SIZE;
3276 int cur, l;
3277 xmlChar stop;
3278 int state = ctxt->instate;
3279 int count = 0;
3280
3281 SHRINK;
3282 if (RAW == '"') {
3283 NEXT;
3284 stop = '"';
3285 } else if (RAW == '\'') {
3286 NEXT;
3287 stop = '\'';
3288 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003289 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003290 return(NULL);
3291 }
3292
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003293 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003294 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003295 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003296 return(NULL);
3297 }
3298 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3299 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003300 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003301 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003302 xmlChar *tmp;
3303
Owen Taylor3473f882001-02-23 17:55:21 +00003304 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3306 if (tmp == NULL) {
3307 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003308 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003309 ctxt->instate = (xmlParserInputState) state;
3310 return(NULL);
3311 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003312 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003313 }
3314 count++;
3315 if (count > 50) {
3316 GROW;
3317 count = 0;
3318 }
3319 COPY_BUF(l,buf,len,cur);
3320 NEXTL(l);
3321 cur = CUR_CHAR(l);
3322 if (cur == 0) {
3323 GROW;
3324 SHRINK;
3325 cur = CUR_CHAR(l);
3326 }
3327 }
3328 buf[len] = 0;
3329 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003330 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003331 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003332 } else {
3333 NEXT;
3334 }
3335 return(buf);
3336}
3337
3338/**
3339 * xmlParsePubidLiteral:
3340 * @ctxt: an XML parser context
3341 *
3342 * parse an XML public literal
3343 *
3344 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3345 *
3346 * Returns the PubidLiteral parsed or NULL.
3347 */
3348
3349xmlChar *
3350xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3351 xmlChar *buf = NULL;
3352 int len = 0;
3353 int size = XML_PARSER_BUFFER_SIZE;
3354 xmlChar cur;
3355 xmlChar stop;
3356 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003357 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003358
3359 SHRINK;
3360 if (RAW == '"') {
3361 NEXT;
3362 stop = '"';
3363 } else if (RAW == '\'') {
3364 NEXT;
3365 stop = '\'';
3366 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003367 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003368 return(NULL);
3369 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003370 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003371 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003372 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003373 return(NULL);
3374 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003375 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003376 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003377 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003378 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003379 xmlChar *tmp;
3380
Owen Taylor3473f882001-02-23 17:55:21 +00003381 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3383 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003384 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003386 return(NULL);
3387 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 buf[len++] = cur;
3391 count++;
3392 if (count > 50) {
3393 GROW;
3394 count = 0;
3395 }
3396 NEXT;
3397 cur = CUR;
3398 if (cur == 0) {
3399 GROW;
3400 SHRINK;
3401 cur = CUR;
3402 }
3403 }
3404 buf[len] = 0;
3405 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003406 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407 } else {
3408 NEXT;
3409 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003410 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003411 return(buf);
3412}
3413
Daniel Veillard48b2f892001-02-25 16:11:03 +00003414void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003415
3416/*
3417 * used for the test in the inner loop of the char data testing
3418 */
3419static const unsigned char test_char_data[256] = {
3420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3421 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3422 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3425 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3426 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3427 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3428 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3429 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3430 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3431 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3432 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3433 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3434 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3435 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3452};
3453
Owen Taylor3473f882001-02-23 17:55:21 +00003454/**
3455 * xmlParseCharData:
3456 * @ctxt: an XML parser context
3457 * @cdata: int indicating whether we are within a CDATA section
3458 *
3459 * parse a CharData section.
3460 * if we are within a CDATA section ']]>' marks an end of section.
3461 *
3462 * The right angle bracket (>) may be represented using the string "&gt;",
3463 * and must, for compatibility, be escaped using "&gt;" or a character
3464 * reference when it appears in the string "]]>" in content, when that
3465 * string is not marking the end of a CDATA section.
3466 *
3467 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3468 */
3469
3470void
3471xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003472 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003473 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003474 int line = ctxt->input->line;
3475 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003476 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003477
3478 SHRINK;
3479 GROW;
3480 /*
3481 * Accelerated common case where input don't need to be
3482 * modified before passing it to the handler.
3483 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003484 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003485 in = ctxt->input->cur;
3486 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003487get_more_space:
3488 while (*in == 0x20) in++;
3489 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003490 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003491 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003492 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003494 goto get_more_space;
3495 }
3496 if (*in == '<') {
3497 nbchar = in - ctxt->input->cur;
3498 if (nbchar > 0) {
3499 const xmlChar *tmp = ctxt->input->cur;
3500 ctxt->input->cur = in;
3501
Daniel Veillard34099b42004-11-04 17:34:35 +00003502 if ((ctxt->sax != NULL) &&
3503 (ctxt->sax->ignorableWhitespace !=
3504 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003505 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003506 if (ctxt->sax->ignorableWhitespace != NULL)
3507 ctxt->sax->ignorableWhitespace(ctxt->userData,
3508 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003509 } else if (ctxt->sax->characters != NULL)
3510 ctxt->sax->characters(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003512 } else if ((ctxt->sax != NULL) &&
3513 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 }
3517 }
3518 return;
3519 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003520
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003521get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003522 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003523 while (test_char_data[*in]) {
3524 in++;
3525 ccol++;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003528 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003529 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003530 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003531 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003532 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003533 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003534 }
3535 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003536 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003537 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003538 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003539 return;
3540 }
3541 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003542 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 goto get_more;
3544 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003545 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003546 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003547 if ((ctxt->sax != NULL) &&
3548 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003549 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003550 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003551 const xmlChar *tmp = ctxt->input->cur;
3552 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003553
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003554 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003555 if (ctxt->sax->ignorableWhitespace != NULL)
3556 ctxt->sax->ignorableWhitespace(ctxt->userData,
3557 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003558 } else if (ctxt->sax->characters != NULL)
3559 ctxt->sax->characters(ctxt->userData,
3560 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003561 line = ctxt->input->line;
3562 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003563 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003564 if (ctxt->sax->characters != NULL)
3565 ctxt->sax->characters(ctxt->userData,
3566 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003567 line = ctxt->input->line;
3568 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003569 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003570 }
3571 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003572 if (*in == 0xD) {
3573 in++;
3574 if (*in == 0xA) {
3575 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003576 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003577 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003578 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003579 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003580 in--;
3581 }
3582 if (*in == '<') {
3583 return;
3584 }
3585 if (*in == '&') {
3586 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 }
3588 SHRINK;
3589 GROW;
3590 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003591 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003592 nbchar = 0;
3593 }
Daniel Veillard50582112001-03-26 22:52:16 +00003594 ctxt->input->line = line;
3595 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003596 xmlParseCharDataComplex(ctxt, cdata);
3597}
3598
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599/**
3600 * xmlParseCharDataComplex:
3601 * @ctxt: an XML parser context
3602 * @cdata: int indicating whether we are within a CDATA section
3603 *
3604 * parse a CharData section.this is the fallback function
3605 * of xmlParseCharData() when the parsing requires handling
3606 * of non-ASCII characters.
3607 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003608void
3609xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003610 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3611 int nbchar = 0;
3612 int cur, l;
3613 int count = 0;
3614
3615 SHRINK;
3616 GROW;
3617 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003618 while ((cur != '<') && /* checked */
3619 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003620 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if ((cur == ']') && (NXT(1) == ']') &&
3622 (NXT(2) == '>')) {
3623 if (cdata) break;
3624 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003625 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003626 }
3627 }
3628 COPY_BUF(l,buf,nbchar,cur);
3629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003630 buf[nbchar] = 0;
3631
Owen Taylor3473f882001-02-23 17:55:21 +00003632 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003633 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003634 */
3635 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003636 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003637 if (ctxt->sax->ignorableWhitespace != NULL)
3638 ctxt->sax->ignorableWhitespace(ctxt->userData,
3639 buf, nbchar);
3640 } else {
3641 if (ctxt->sax->characters != NULL)
3642 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3643 }
3644 }
3645 nbchar = 0;
3646 }
3647 count++;
3648 if (count > 50) {
3649 GROW;
3650 count = 0;
3651 }
3652 NEXTL(l);
3653 cur = CUR_CHAR(l);
3654 }
3655 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003656 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003658 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003659 */
3660 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003661 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003662 if (ctxt->sax->ignorableWhitespace != NULL)
3663 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3664 } else {
3665 if (ctxt->sax->characters != NULL)
3666 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3667 }
3668 }
3669 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003670 if ((cur != 0) && (!IS_CHAR(cur))) {
3671 /* Generate the error and skip the offending character */
3672 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3673 "PCDATA invalid Char value %d\n",
3674 cur);
3675 NEXTL(l);
3676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677}
3678
3679/**
3680 * xmlParseExternalID:
3681 * @ctxt: an XML parser context
3682 * @publicID: a xmlChar** receiving PubidLiteral
3683 * @strict: indicate whether we should restrict parsing to only
3684 * production [75], see NOTE below
3685 *
3686 * Parse an External ID or a Public ID
3687 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003688 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003689 * 'PUBLIC' S PubidLiteral S SystemLiteral
3690 *
3691 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3692 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3693 *
3694 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3695 *
3696 * Returns the function returns SystemLiteral and in the second
3697 * case publicID receives PubidLiteral, is strict is off
3698 * it is possible to return NULL and have publicID set.
3699 */
3700
3701xmlChar *
3702xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3703 xmlChar *URI = NULL;
3704
3705 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003706
3707 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003708 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003709 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003710 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3712 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 SKIP_BLANKS;
3715 URI = xmlParseSystemLiteral(ctxt);
3716 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003717 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003719 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003720 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003721 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003722 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003723 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003724 }
3725 SKIP_BLANKS;
3726 *publicID = xmlParsePubidLiteral(ctxt);
3727 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003728 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 if (strict) {
3731 /*
3732 * We don't handle [83] so "S SystemLiteral" is required.
3733 */
William M. Brack76e95df2003-10-18 16:20:14 +00003734 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003736 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 } else {
3739 /*
3740 * We handle [83] so we return immediately, if
3741 * "S SystemLiteral" is not detected. From a purely parsing
3742 * point of view that's a nice mess.
3743 */
3744 const xmlChar *ptr;
3745 GROW;
3746
3747 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003748 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003749
William M. Brack76e95df2003-10-18 16:20:14 +00003750 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003751 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3752 }
3753 SKIP_BLANKS;
3754 URI = xmlParseSystemLiteral(ctxt);
3755 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003756 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 }
3758 }
3759 return(URI);
3760}
3761
3762/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003763 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003764 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003765 * @buf: the already parsed part of the buffer
3766 * @len: number of bytes filles in the buffer
3767 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003768 *
3769 * Skip an XML (SGML) comment <!-- .... -->
3770 * The spec says that "For compatibility, the string "--" (double-hyphen)
3771 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003772 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003773 *
3774 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3775 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003776static void
3777xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003778 int q, ql;
3779 int r, rl;
3780 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 xmlParserInputPtr input = ctxt->input;
3782 int count = 0;
3783
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003785 len = 0;
3786 size = XML_PARSER_BUFFER_SIZE;
3787 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3788 if (buf == NULL) {
3789 xmlErrMemory(ctxt, NULL);
3790 return;
3791 }
Owen Taylor3473f882001-02-23 17:55:21 +00003792 }
3793 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003794 if (q == 0)
3795 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003796 NEXTL(ql);
3797 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003798 if (r == 0)
3799 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003800 NEXTL(rl);
3801 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003802 if (cur == 0)
3803 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003804 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003805 ((cur != '>') ||
3806 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003807 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 }
3810 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003811 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003812 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003813 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3814 if (new_buf == NULL) {
3815 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 return;
3818 }
William M. Bracka3215c72004-07-31 16:24:01 +00003819 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 }
3821 COPY_BUF(ql,buf,len,q);
3822 q = r;
3823 ql = rl;
3824 r = cur;
3825 rl = l;
3826
3827 count++;
3828 if (count > 50) {
3829 GROW;
3830 count = 0;
3831 }
3832 NEXTL(l);
3833 cur = CUR_CHAR(l);
3834 if (cur == 0) {
3835 SHRINK;
3836 GROW;
3837 cur = CUR_CHAR(l);
3838 }
3839 }
3840 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003841 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003842 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003843 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003844 xmlFree(buf);
3845 } else {
3846 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003847 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3848 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003849 }
3850 NEXT;
3851 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3852 (!ctxt->disableSAX))
3853 ctxt->sax->comment(ctxt->userData, buf);
3854 xmlFree(buf);
3855 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003856 return;
3857not_terminated:
3858 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3859 "Comment not terminated\n", NULL);
3860 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003861}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003862/**
3863 * xmlParseComment:
3864 * @ctxt: an XML parser context
3865 *
3866 * Skip an XML (SGML) comment <!-- .... -->
3867 * The spec says that "For compatibility, the string "--" (double-hyphen)
3868 * must not occur within comments. "
3869 *
3870 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3871 */
3872void
3873xmlParseComment(xmlParserCtxtPtr ctxt) {
3874 xmlChar *buf = NULL;
3875 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003876 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003877 xmlParserInputState state;
3878 const xmlChar *in;
3879 int nbchar = 0, ccol;
3880
3881 /*
3882 * Check that there is a comment right here.
3883 */
3884 if ((RAW != '<') || (NXT(1) != '!') ||
3885 (NXT(2) != '-') || (NXT(3) != '-')) return;
3886
3887 state = ctxt->instate;
3888 ctxt->instate = XML_PARSER_COMMENT;
3889 SKIP(4);
3890 SHRINK;
3891 GROW;
3892
3893 /*
3894 * Accelerated common case where input don't need to be
3895 * modified before passing it to the handler.
3896 */
3897 in = ctxt->input->cur;
3898 do {
3899 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003900 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003901 ctxt->input->line++; ctxt->input->col = 1;
3902 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003903 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003904 }
3905get_more:
3906 ccol = ctxt->input->col;
3907 while (((*in > '-') && (*in <= 0x7F)) ||
3908 ((*in >= 0x20) && (*in < '-')) ||
3909 (*in == 0x09)) {
3910 in++;
3911 ccol++;
3912 }
3913 ctxt->input->col = ccol;
3914 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003915 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003916 ctxt->input->line++; ctxt->input->col = 1;
3917 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003918 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003919 goto get_more;
3920 }
3921 nbchar = in - ctxt->input->cur;
3922 /*
3923 * save current set of data
3924 */
3925 if (nbchar > 0) {
3926 if ((ctxt->sax != NULL) &&
3927 (ctxt->sax->comment != NULL)) {
3928 if (buf == NULL) {
3929 if ((*in == '-') && (in[1] == '-'))
3930 size = nbchar + 1;
3931 else
3932 size = XML_PARSER_BUFFER_SIZE + nbchar;
3933 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3934 if (buf == NULL) {
3935 xmlErrMemory(ctxt, NULL);
3936 ctxt->instate = state;
3937 return;
3938 }
3939 len = 0;
3940 } else if (len + nbchar + 1 >= size) {
3941 xmlChar *new_buf;
3942 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3943 new_buf = (xmlChar *) xmlRealloc(buf,
3944 size * sizeof(xmlChar));
3945 if (new_buf == NULL) {
3946 xmlFree (buf);
3947 xmlErrMemory(ctxt, NULL);
3948 ctxt->instate = state;
3949 return;
3950 }
3951 buf = new_buf;
3952 }
3953 memcpy(&buf[len], ctxt->input->cur, nbchar);
3954 len += nbchar;
3955 buf[len] = 0;
3956 }
3957 }
3958 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003959 if (*in == 0xA) {
3960 in++;
3961 ctxt->input->line++; ctxt->input->col = 1;
3962 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003963 if (*in == 0xD) {
3964 in++;
3965 if (*in == 0xA) {
3966 ctxt->input->cur = in;
3967 in++;
3968 ctxt->input->line++; ctxt->input->col = 1;
3969 continue; /* while */
3970 }
3971 in--;
3972 }
3973 SHRINK;
3974 GROW;
3975 in = ctxt->input->cur;
3976 if (*in == '-') {
3977 if (in[1] == '-') {
3978 if (in[2] == '>') {
3979 SKIP(3);
3980 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3981 (!ctxt->disableSAX)) {
3982 if (buf != NULL)
3983 ctxt->sax->comment(ctxt->userData, buf);
3984 else
3985 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3986 }
3987 if (buf != NULL)
3988 xmlFree(buf);
3989 ctxt->instate = state;
3990 return;
3991 }
3992 if (buf != NULL)
3993 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3994 "Comment not terminated \n<!--%.50s\n",
3995 buf);
3996 else
3997 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3998 "Comment not terminated \n", NULL);
3999 in++;
4000 ctxt->input->col++;
4001 }
4002 in++;
4003 ctxt->input->col++;
4004 goto get_more;
4005 }
4006 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4007 xmlParseCommentComplex(ctxt, buf, len, size);
4008 ctxt->instate = state;
4009 return;
4010}
4011
Owen Taylor3473f882001-02-23 17:55:21 +00004012
4013/**
4014 * xmlParsePITarget:
4015 * @ctxt: an XML parser context
4016 *
4017 * parse the name of a PI
4018 *
4019 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4020 *
4021 * Returns the PITarget name or NULL
4022 */
4023
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004024const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004025xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004026 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004027
4028 name = xmlParseName(ctxt);
4029 if ((name != NULL) &&
4030 ((name[0] == 'x') || (name[0] == 'X')) &&
4031 ((name[1] == 'm') || (name[1] == 'M')) &&
4032 ((name[2] == 'l') || (name[2] == 'L'))) {
4033 int i;
4034 if ((name[0] == 'x') && (name[1] == 'm') &&
4035 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004036 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004037 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004038 return(name);
4039 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004040 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 return(name);
4042 }
4043 for (i = 0;;i++) {
4044 if (xmlW3CPIs[i] == NULL) break;
4045 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4046 return(name);
4047 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004048 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4049 "xmlParsePITarget: invalid name prefix 'xml'\n",
4050 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004051 }
4052 return(name);
4053}
4054
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004055#ifdef LIBXML_CATALOG_ENABLED
4056/**
4057 * xmlParseCatalogPI:
4058 * @ctxt: an XML parser context
4059 * @catalog: the PI value string
4060 *
4061 * parse an XML Catalog Processing Instruction.
4062 *
4063 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4064 *
4065 * Occurs only if allowed by the user and if happening in the Misc
4066 * part of the document before any doctype informations
4067 * This will add the given catalog to the parsing context in order
4068 * to be used if there is a resolution need further down in the document
4069 */
4070
4071static void
4072xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4073 xmlChar *URL = NULL;
4074 const xmlChar *tmp, *base;
4075 xmlChar marker;
4076
4077 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004078 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004079 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4080 goto error;
4081 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004082 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004083 if (*tmp != '=') {
4084 return;
4085 }
4086 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004087 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004088 marker = *tmp;
4089 if ((marker != '\'') && (marker != '"'))
4090 goto error;
4091 tmp++;
4092 base = tmp;
4093 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4094 if (*tmp == 0)
4095 goto error;
4096 URL = xmlStrndup(base, tmp - base);
4097 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004098 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004099 if (*tmp != 0)
4100 goto error;
4101
4102 if (URL != NULL) {
4103 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4104 xmlFree(URL);
4105 }
4106 return;
4107
4108error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004109 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4110 "Catalog PI syntax error: %s\n",
4111 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004112 if (URL != NULL)
4113 xmlFree(URL);
4114}
4115#endif
4116
Owen Taylor3473f882001-02-23 17:55:21 +00004117/**
4118 * xmlParsePI:
4119 * @ctxt: an XML parser context
4120 *
4121 * parse an XML Processing Instruction.
4122 *
4123 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4124 *
4125 * The processing is transfered to SAX once parsed.
4126 */
4127
4128void
4129xmlParsePI(xmlParserCtxtPtr ctxt) {
4130 xmlChar *buf = NULL;
4131 int len = 0;
4132 int size = XML_PARSER_BUFFER_SIZE;
4133 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004134 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004135 xmlParserInputState state;
4136 int count = 0;
4137
4138 if ((RAW == '<') && (NXT(1) == '?')) {
4139 xmlParserInputPtr input = ctxt->input;
4140 state = ctxt->instate;
4141 ctxt->instate = XML_PARSER_PI;
4142 /*
4143 * this is a Processing Instruction.
4144 */
4145 SKIP(2);
4146 SHRINK;
4147
4148 /*
4149 * Parse the target name and check for special support like
4150 * namespace.
4151 */
4152 target = xmlParsePITarget(ctxt);
4153 if (target != NULL) {
4154 if ((RAW == '?') && (NXT(1) == '>')) {
4155 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004156 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4157 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004158 }
4159 SKIP(2);
4160
4161 /*
4162 * SAX: PI detected.
4163 */
4164 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4165 (ctxt->sax->processingInstruction != NULL))
4166 ctxt->sax->processingInstruction(ctxt->userData,
4167 target, NULL);
4168 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 return;
4170 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004171 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004172 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004173 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004174 ctxt->instate = state;
4175 return;
4176 }
4177 cur = CUR;
4178 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004179 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4180 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004181 }
4182 SKIP_BLANKS;
4183 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004184 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004185 ((cur != '?') || (NXT(1) != '>'))) {
4186 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004187 xmlChar *tmp;
4188
Owen Taylor3473f882001-02-23 17:55:21 +00004189 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004190 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4191 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004192 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004193 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004194 ctxt->instate = state;
4195 return;
4196 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004197 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004198 }
4199 count++;
4200 if (count > 50) {
4201 GROW;
4202 count = 0;
4203 }
4204 COPY_BUF(l,buf,len,cur);
4205 NEXTL(l);
4206 cur = CUR_CHAR(l);
4207 if (cur == 0) {
4208 SHRINK;
4209 GROW;
4210 cur = CUR_CHAR(l);
4211 }
4212 }
4213 buf[len] = 0;
4214 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004215 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4216 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 } else {
4218 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004219 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4220 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
4222 SKIP(2);
4223
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004224#ifdef LIBXML_CATALOG_ENABLED
4225 if (((state == XML_PARSER_MISC) ||
4226 (state == XML_PARSER_START)) &&
4227 (xmlStrEqual(target, XML_CATALOG_PI))) {
4228 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4229 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4230 (allow == XML_CATA_ALLOW_ALL))
4231 xmlParseCatalogPI(ctxt, buf);
4232 }
4233#endif
4234
4235
Owen Taylor3473f882001-02-23 17:55:21 +00004236 /*
4237 * SAX: PI detected.
4238 */
4239 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4240 (ctxt->sax->processingInstruction != NULL))
4241 ctxt->sax->processingInstruction(ctxt->userData,
4242 target, buf);
4243 }
4244 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 }
4248 ctxt->instate = state;
4249 }
4250}
4251
4252/**
4253 * xmlParseNotationDecl:
4254 * @ctxt: an XML parser context
4255 *
4256 * parse a notation declaration
4257 *
4258 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4259 *
4260 * Hence there is actually 3 choices:
4261 * 'PUBLIC' S PubidLiteral
4262 * 'PUBLIC' S PubidLiteral S SystemLiteral
4263 * and 'SYSTEM' S SystemLiteral
4264 *
4265 * See the NOTE on xmlParseExternalID().
4266 */
4267
4268void
4269xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004270 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004271 xmlChar *Pubid;
4272 xmlChar *Systemid;
4273
Daniel Veillarda07050d2003-10-19 14:46:32 +00004274 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004275 xmlParserInputPtr input = ctxt->input;
4276 SHRINK;
4277 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004278 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004279 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4280 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004281 return;
4282 }
4283 SKIP_BLANKS;
4284
Daniel Veillard76d66f42001-05-16 21:05:17 +00004285 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004286 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 return;
4289 }
William M. Brack76e95df2003-10-18 16:20:14 +00004290 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004291 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004292 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004293 return;
4294 }
4295 SKIP_BLANKS;
4296
4297 /*
4298 * Parse the IDs.
4299 */
4300 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4301 SKIP_BLANKS;
4302
4303 if (RAW == '>') {
4304 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4306 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004307 }
4308 NEXT;
4309 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4310 (ctxt->sax->notationDecl != NULL))
4311 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4312 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004313 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004314 }
Owen Taylor3473f882001-02-23 17:55:21 +00004315 if (Systemid != NULL) xmlFree(Systemid);
4316 if (Pubid != NULL) xmlFree(Pubid);
4317 }
4318}
4319
4320/**
4321 * xmlParseEntityDecl:
4322 * @ctxt: an XML parser context
4323 *
4324 * parse <!ENTITY declarations
4325 *
4326 * [70] EntityDecl ::= GEDecl | PEDecl
4327 *
4328 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4329 *
4330 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4331 *
4332 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4333 *
4334 * [74] PEDef ::= EntityValue | ExternalID
4335 *
4336 * [76] NDataDecl ::= S 'NDATA' S Name
4337 *
4338 * [ VC: Notation Declared ]
4339 * The Name must match the declared name of a notation.
4340 */
4341
4342void
4343xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004344 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004345 xmlChar *value = NULL;
4346 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004347 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004348 int isParameter = 0;
4349 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004350 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004351
Daniel Veillard4c778d82005-01-23 17:37:44 +00004352 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004353 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004354 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004355 SHRINK;
4356 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004357 skipped = SKIP_BLANKS;
4358 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4360 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004361 }
Owen Taylor3473f882001-02-23 17:55:21 +00004362
4363 if (RAW == '%') {
4364 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004365 skipped = SKIP_BLANKS;
4366 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4368 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004369 }
Owen Taylor3473f882001-02-23 17:55:21 +00004370 isParameter = 1;
4371 }
4372
Daniel Veillard76d66f42001-05-16 21:05:17 +00004373 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004374 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004375 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4376 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004377 return;
4378 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004379 skipped = SKIP_BLANKS;
4380 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004381 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4382 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004383 }
Owen Taylor3473f882001-02-23 17:55:21 +00004384
Daniel Veillardf5582f12002-06-11 10:08:16 +00004385 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004386 /*
4387 * handle the various case of definitions...
4388 */
4389 if (isParameter) {
4390 if ((RAW == '"') || (RAW == '\'')) {
4391 value = xmlParseEntityValue(ctxt, &orig);
4392 if (value) {
4393 if ((ctxt->sax != NULL) &&
4394 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4395 ctxt->sax->entityDecl(ctxt->userData, name,
4396 XML_INTERNAL_PARAMETER_ENTITY,
4397 NULL, NULL, value);
4398 }
4399 } else {
4400 URI = xmlParseExternalID(ctxt, &literal, 1);
4401 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004402 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 }
4404 if (URI) {
4405 xmlURIPtr uri;
4406
4407 uri = xmlParseURI((const char *) URI);
4408 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004409 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4410 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004411 /*
4412 * This really ought to be a well formedness error
4413 * but the XML Core WG decided otherwise c.f. issue
4414 * E26 of the XML erratas.
4415 */
Owen Taylor3473f882001-02-23 17:55:21 +00004416 } else {
4417 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004418 /*
4419 * Okay this is foolish to block those but not
4420 * invalid URIs.
4421 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004422 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004423 } else {
4424 if ((ctxt->sax != NULL) &&
4425 (!ctxt->disableSAX) &&
4426 (ctxt->sax->entityDecl != NULL))
4427 ctxt->sax->entityDecl(ctxt->userData, name,
4428 XML_EXTERNAL_PARAMETER_ENTITY,
4429 literal, URI, NULL);
4430 }
4431 xmlFreeURI(uri);
4432 }
4433 }
4434 }
4435 } else {
4436 if ((RAW == '"') || (RAW == '\'')) {
4437 value = xmlParseEntityValue(ctxt, &orig);
4438 if ((ctxt->sax != NULL) &&
4439 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4440 ctxt->sax->entityDecl(ctxt->userData, name,
4441 XML_INTERNAL_GENERAL_ENTITY,
4442 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004443 /*
4444 * For expat compatibility in SAX mode.
4445 */
4446 if ((ctxt->myDoc == NULL) ||
4447 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4448 if (ctxt->myDoc == NULL) {
4449 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4450 }
4451 if (ctxt->myDoc->intSubset == NULL)
4452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4453 BAD_CAST "fake", NULL, NULL);
4454
Daniel Veillard1af9a412003-08-20 22:54:39 +00004455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4456 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004457 }
Owen Taylor3473f882001-02-23 17:55:21 +00004458 } else {
4459 URI = xmlParseExternalID(ctxt, &literal, 1);
4460 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
4463 if (URI) {
4464 xmlURIPtr uri;
4465
4466 uri = xmlParseURI((const char *)URI);
4467 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4469 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004470 /*
4471 * This really ought to be a well formedness error
4472 * but the XML Core WG decided otherwise c.f. issue
4473 * E26 of the XML erratas.
4474 */
Owen Taylor3473f882001-02-23 17:55:21 +00004475 } else {
4476 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004477 /*
4478 * Okay this is foolish to block those but not
4479 * invalid URIs.
4480 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004482 }
4483 xmlFreeURI(uri);
4484 }
4485 }
William M. Brack76e95df2003-10-18 16:20:14 +00004486 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4488 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004489 }
4490 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004491 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004492 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004493 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4495 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004496 }
4497 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004498 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004499 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4500 (ctxt->sax->unparsedEntityDecl != NULL))
4501 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4502 literal, URI, ndata);
4503 } else {
4504 if ((ctxt->sax != NULL) &&
4505 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4506 ctxt->sax->entityDecl(ctxt->userData, name,
4507 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4508 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004509 /*
4510 * For expat compatibility in SAX mode.
4511 * assuming the entity repalcement was asked for
4512 */
4513 if ((ctxt->replaceEntities != 0) &&
4514 ((ctxt->myDoc == NULL) ||
4515 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4516 if (ctxt->myDoc == NULL) {
4517 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4518 }
4519
4520 if (ctxt->myDoc->intSubset == NULL)
4521 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4522 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004523 xmlSAX2EntityDecl(ctxt, name,
4524 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4525 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004526 }
Owen Taylor3473f882001-02-23 17:55:21 +00004527 }
4528 }
4529 }
4530 SKIP_BLANKS;
4531 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004533 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004534 } else {
4535 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004536 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4537 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004538 }
4539 NEXT;
4540 }
4541 if (orig != NULL) {
4542 /*
4543 * Ugly mechanism to save the raw entity value.
4544 */
4545 xmlEntityPtr cur = NULL;
4546
4547 if (isParameter) {
4548 if ((ctxt->sax != NULL) &&
4549 (ctxt->sax->getParameterEntity != NULL))
4550 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4551 } else {
4552 if ((ctxt->sax != NULL) &&
4553 (ctxt->sax->getEntity != NULL))
4554 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004555 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004556 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004557 }
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
4559 if (cur != NULL) {
4560 if (cur->orig != NULL)
4561 xmlFree(orig);
4562 else
4563 cur->orig = orig;
4564 } else
4565 xmlFree(orig);
4566 }
Owen Taylor3473f882001-02-23 17:55:21 +00004567 if (value != NULL) xmlFree(value);
4568 if (URI != NULL) xmlFree(URI);
4569 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004570 }
4571}
4572
4573/**
4574 * xmlParseDefaultDecl:
4575 * @ctxt: an XML parser context
4576 * @value: Receive a possible fixed default value for the attribute
4577 *
4578 * Parse an attribute default declaration
4579 *
4580 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4581 *
4582 * [ VC: Required Attribute ]
4583 * if the default declaration is the keyword #REQUIRED, then the
4584 * attribute must be specified for all elements of the type in the
4585 * attribute-list declaration.
4586 *
4587 * [ VC: Attribute Default Legal ]
4588 * The declared default value must meet the lexical constraints of
4589 * the declared attribute type c.f. xmlValidateAttributeDecl()
4590 *
4591 * [ VC: Fixed Attribute Default ]
4592 * if an attribute has a default value declared with the #FIXED
4593 * keyword, instances of that attribute must match the default value.
4594 *
4595 * [ WFC: No < in Attribute Values ]
4596 * handled in xmlParseAttValue()
4597 *
4598 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4599 * or XML_ATTRIBUTE_FIXED.
4600 */
4601
4602int
4603xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4604 int val;
4605 xmlChar *ret;
4606
4607 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004608 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004609 SKIP(9);
4610 return(XML_ATTRIBUTE_REQUIRED);
4611 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004612 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004613 SKIP(8);
4614 return(XML_ATTRIBUTE_IMPLIED);
4615 }
4616 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004617 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004618 SKIP(6);
4619 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004620 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004621 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4622 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004623 }
4624 SKIP_BLANKS;
4625 }
4626 ret = xmlParseAttValue(ctxt);
4627 ctxt->instate = XML_PARSER_DTD;
4628 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004629 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 } else
4632 *value = ret;
4633 return(val);
4634}
4635
4636/**
4637 * xmlParseNotationType:
4638 * @ctxt: an XML parser context
4639 *
4640 * parse an Notation attribute type.
4641 *
4642 * Note: the leading 'NOTATION' S part has already being parsed...
4643 *
4644 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4645 *
4646 * [ VC: Notation Attributes ]
4647 * Values of this type must match one of the notation names included
4648 * in the declaration; all notation names in the declaration must be declared.
4649 *
4650 * Returns: the notation attribute tree built while parsing
4651 */
4652
4653xmlEnumerationPtr
4654xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004655 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4657
4658 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004659 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004660 return(NULL);
4661 }
4662 SHRINK;
4663 do {
4664 NEXT;
4665 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004666 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004667 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4669 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 return(ret);
4671 }
4672 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 if (cur == NULL) return(ret);
4674 if (last == NULL) ret = last = cur;
4675 else {
4676 last->next = cur;
4677 last = cur;
4678 }
4679 SKIP_BLANKS;
4680 } while (RAW == '|');
4681 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004682 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 if ((last != NULL) && (last != ret))
4684 xmlFreeEnumeration(last);
4685 return(ret);
4686 }
4687 NEXT;
4688 return(ret);
4689}
4690
4691/**
4692 * xmlParseEnumerationType:
4693 * @ctxt: an XML parser context
4694 *
4695 * parse an Enumeration attribute type.
4696 *
4697 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4698 *
4699 * [ VC: Enumeration ]
4700 * Values of this type must match one of the Nmtoken tokens in
4701 * the declaration
4702 *
4703 * Returns: the enumeration attribute tree built while parsing
4704 */
4705
4706xmlEnumerationPtr
4707xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4708 xmlChar *name;
4709 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4710
4711 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004712 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004713 return(NULL);
4714 }
4715 SHRINK;
4716 do {
4717 NEXT;
4718 SKIP_BLANKS;
4719 name = xmlParseNmtoken(ctxt);
4720 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004721 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004722 return(ret);
4723 }
4724 cur = xmlCreateEnumeration(name);
4725 xmlFree(name);
4726 if (cur == NULL) return(ret);
4727 if (last == NULL) ret = last = cur;
4728 else {
4729 last->next = cur;
4730 last = cur;
4731 }
4732 SKIP_BLANKS;
4733 } while (RAW == '|');
4734 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004736 return(ret);
4737 }
4738 NEXT;
4739 return(ret);
4740}
4741
4742/**
4743 * xmlParseEnumeratedType:
4744 * @ctxt: an XML parser context
4745 * @tree: the enumeration tree built while parsing
4746 *
4747 * parse an Enumerated attribute type.
4748 *
4749 * [57] EnumeratedType ::= NotationType | Enumeration
4750 *
4751 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4752 *
4753 *
4754 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4755 */
4756
4757int
4758xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004759 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004760 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004761 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004764 return(0);
4765 }
4766 SKIP_BLANKS;
4767 *tree = xmlParseNotationType(ctxt);
4768 if (*tree == NULL) return(0);
4769 return(XML_ATTRIBUTE_NOTATION);
4770 }
4771 *tree = xmlParseEnumerationType(ctxt);
4772 if (*tree == NULL) return(0);
4773 return(XML_ATTRIBUTE_ENUMERATION);
4774}
4775
4776/**
4777 * xmlParseAttributeType:
4778 * @ctxt: an XML parser context
4779 * @tree: the enumeration tree built while parsing
4780 *
4781 * parse the Attribute list def for an element
4782 *
4783 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4784 *
4785 * [55] StringType ::= 'CDATA'
4786 *
4787 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4788 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4789 *
4790 * Validity constraints for attribute values syntax are checked in
4791 * xmlValidateAttributeValue()
4792 *
4793 * [ VC: ID ]
4794 * Values of type ID must match the Name production. A name must not
4795 * appear more than once in an XML document as a value of this type;
4796 * i.e., ID values must uniquely identify the elements which bear them.
4797 *
4798 * [ VC: One ID per Element Type ]
4799 * No element type may have more than one ID attribute specified.
4800 *
4801 * [ VC: ID Attribute Default ]
4802 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4803 *
4804 * [ VC: IDREF ]
4805 * Values of type IDREF must match the Name production, and values
4806 * of type IDREFS must match Names; each IDREF Name must match the value
4807 * of an ID attribute on some element in the XML document; i.e. IDREF
4808 * values must match the value of some ID attribute.
4809 *
4810 * [ VC: Entity Name ]
4811 * Values of type ENTITY must match the Name production, values
4812 * of type ENTITIES must match Names; each Entity Name must match the
4813 * name of an unparsed entity declared in the DTD.
4814 *
4815 * [ VC: Name Token ]
4816 * Values of type NMTOKEN must match the Nmtoken production; values
4817 * of type NMTOKENS must match Nmtokens.
4818 *
4819 * Returns the attribute type
4820 */
4821int
4822xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4823 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004824 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004825 SKIP(5);
4826 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004827 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004828 SKIP(6);
4829 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004830 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004831 SKIP(5);
4832 return(XML_ATTRIBUTE_IDREF);
4833 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4834 SKIP(2);
4835 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004836 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004837 SKIP(6);
4838 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004839 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004840 SKIP(8);
4841 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004842 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 SKIP(8);
4844 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004845 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004846 SKIP(7);
4847 return(XML_ATTRIBUTE_NMTOKEN);
4848 }
4849 return(xmlParseEnumeratedType(ctxt, tree));
4850}
4851
4852/**
4853 * xmlParseAttributeListDecl:
4854 * @ctxt: an XML parser context
4855 *
4856 * : parse the Attribute list def for an element
4857 *
4858 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4859 *
4860 * [53] AttDef ::= S Name S AttType S DefaultDecl
4861 *
4862 */
4863void
4864xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004865 const xmlChar *elemName;
4866 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004867 xmlEnumerationPtr tree;
4868
Daniel Veillarda07050d2003-10-19 14:46:32 +00004869 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004870 xmlParserInputPtr input = ctxt->input;
4871
4872 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004873 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004874 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004875 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004880 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4881 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004882 return;
4883 }
4884 SKIP_BLANKS;
4885 GROW;
4886 while (RAW != '>') {
4887 const xmlChar *check = CUR_PTR;
4888 int type;
4889 int def;
4890 xmlChar *defaultValue = NULL;
4891
4892 GROW;
4893 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004894 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4897 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004898 break;
4899 }
4900 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004901 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004903 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004904 break;
4905 }
4906 SKIP_BLANKS;
4907
4908 type = xmlParseAttributeType(ctxt, &tree);
4909 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004910 break;
4911 }
4912
4913 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 if (tree != NULL)
4918 xmlFreeEnumeration(tree);
4919 break;
4920 }
4921 SKIP_BLANKS;
4922
4923 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4924 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004925 if (defaultValue != NULL)
4926 xmlFree(defaultValue);
4927 if (tree != NULL)
4928 xmlFreeEnumeration(tree);
4929 break;
4930 }
4931
4932 GROW;
4933 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004934 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004936 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004937 if (defaultValue != NULL)
4938 xmlFree(defaultValue);
4939 if (tree != NULL)
4940 xmlFreeEnumeration(tree);
4941 break;
4942 }
4943 SKIP_BLANKS;
4944 }
4945 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004946 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4947 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 if (defaultValue != NULL)
4949 xmlFree(defaultValue);
4950 if (tree != NULL)
4951 xmlFreeEnumeration(tree);
4952 break;
4953 }
4954 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4955 (ctxt->sax->attributeDecl != NULL))
4956 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4957 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004958 else if (tree != NULL)
4959 xmlFreeEnumeration(tree);
4960
4961 if ((ctxt->sax2) && (defaultValue != NULL) &&
4962 (def != XML_ATTRIBUTE_IMPLIED) &&
4963 (def != XML_ATTRIBUTE_REQUIRED)) {
4964 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4965 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004966 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4967 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4968 }
Owen Taylor3473f882001-02-23 17:55:21 +00004969 if (defaultValue != NULL)
4970 xmlFree(defaultValue);
4971 GROW;
4972 }
4973 if (RAW == '>') {
4974 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004975 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4976 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004977 }
4978 NEXT;
4979 }
Owen Taylor3473f882001-02-23 17:55:21 +00004980 }
4981}
4982
4983/**
4984 * xmlParseElementMixedContentDecl:
4985 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004986 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004987 *
4988 * parse the declaration for a Mixed Element content
4989 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4990 *
4991 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4992 * '(' S? '#PCDATA' S? ')'
4993 *
4994 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4995 *
4996 * [ VC: No Duplicate Types ]
4997 * The same name must not appear more than once in a single
4998 * mixed-content declaration.
4999 *
5000 * returns: the list of the xmlElementContentPtr describing the element choices
5001 */
5002xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005003xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005004 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005005 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005006
5007 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005008 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005009 SKIP(7);
5010 SKIP_BLANKS;
5011 SHRINK;
5012 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005013 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005014 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5015"Element content declaration doesn't start and stop in the same entity\n",
5016 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005017 }
Owen Taylor3473f882001-02-23 17:55:21 +00005018 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005019 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005020 if (RAW == '*') {
5021 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5022 NEXT;
5023 }
5024 return(ret);
5025 }
5026 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005027 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005028 if (ret == NULL) return(NULL);
5029 }
5030 while (RAW == '|') {
5031 NEXT;
5032 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005033 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 if (ret == NULL) return(NULL);
5035 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005036 if (cur != NULL)
5037 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005038 cur = ret;
5039 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005040 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005041 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005042 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005043 if (n->c1 != NULL)
5044 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005046 if (n != NULL)
5047 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005048 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
5050 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005051 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005053 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005054 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005055 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005056 return(NULL);
5057 }
5058 SKIP_BLANKS;
5059 GROW;
5060 }
5061 if ((RAW == ')') && (NXT(1) == '*')) {
5062 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005063 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005064 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005065 if (cur->c2 != NULL)
5066 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 }
5068 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005070 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5071"Element content declaration doesn't start and stop in the same entity\n",
5072 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005073 }
Owen Taylor3473f882001-02-23 17:55:21 +00005074 SKIP(2);
5075 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005076 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005077 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005078 return(NULL);
5079 }
5080
5081 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005082 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005083 }
5084 return(ret);
5085}
5086
5087/**
5088 * xmlParseElementChildrenContentDecl:
5089 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005090 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005091 *
5092 * parse the declaration for a Mixed Element content
5093 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5094 *
5095 *
5096 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5097 *
5098 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5099 *
5100 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5101 *
5102 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5103 *
5104 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5105 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005106 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005107 * opening or closing parentheses in a choice, seq, or Mixed
5108 * construct is contained in the replacement text for a parameter
5109 * entity, both must be contained in the same replacement text. For
5110 * interoperability, if a parameter-entity reference appears in a
5111 * choice, seq, or Mixed construct, its replacement text should not
5112 * be empty, and neither the first nor last non-blank character of
5113 * the replacement text should be a connector (| or ,).
5114 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005115 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005116 * hierarchy.
5117 */
5118xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005119xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005120 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005121 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005122 xmlChar type = 0;
5123
5124 SKIP_BLANKS;
5125 GROW;
5126 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005127 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005128
Owen Taylor3473f882001-02-23 17:55:21 +00005129 /* Recurse on first child */
5130 NEXT;
5131 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005132 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 SKIP_BLANKS;
5134 GROW;
5135 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 return(NULL);
5140 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005141 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005142 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005143 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005144 return(NULL);
5145 }
Owen Taylor3473f882001-02-23 17:55:21 +00005146 GROW;
5147 if (RAW == '?') {
5148 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5149 NEXT;
5150 } else if (RAW == '*') {
5151 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5152 NEXT;
5153 } else if (RAW == '+') {
5154 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5155 NEXT;
5156 } else {
5157 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 GROW;
5160 }
5161 SKIP_BLANKS;
5162 SHRINK;
5163 while (RAW != ')') {
5164 /*
5165 * Each loop we parse one separator and one element.
5166 */
5167 if (RAW == ',') {
5168 if (type == 0) type = CUR;
5169
5170 /*
5171 * Detect "Name | Name , Name" error
5172 */
5173 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005174 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005175 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005176 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005177 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005178 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005179 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005180 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005181 return(NULL);
5182 }
5183 NEXT;
5184
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005185 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005187 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005188 xmlFreeDocElementContent(ctxt->myDoc, last);
5189 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 return(NULL);
5191 }
5192 if (last == NULL) {
5193 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005194 if (ret != NULL)
5195 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005196 ret = cur = op;
5197 } else {
5198 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005199 if (op != NULL)
5200 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005201 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005202 if (last != NULL)
5203 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 cur =op;
5205 last = NULL;
5206 }
5207 } else if (RAW == '|') {
5208 if (type == 0) type = CUR;
5209
5210 /*
5211 * Detect "Name , Name | Name" error
5212 */
5213 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005214 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005215 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005216 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005217 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005220 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 return(NULL);
5222 }
5223 NEXT;
5224
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005225 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005227 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005228 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005230 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 return(NULL);
5232 }
5233 if (last == NULL) {
5234 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005235 if (ret != NULL)
5236 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005237 ret = cur = op;
5238 } else {
5239 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005240 if (op != NULL)
5241 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005242 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005243 if (last != NULL)
5244 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005245 cur =op;
5246 last = NULL;
5247 }
5248 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005249 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005251 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005252 return(NULL);
5253 }
5254 GROW;
5255 SKIP_BLANKS;
5256 GROW;
5257 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005258 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005259 /* Recurse on second child */
5260 NEXT;
5261 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005262 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005263 SKIP_BLANKS;
5264 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005265 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005269 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(NULL);
5271 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005272 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005273 if (RAW == '?') {
5274 last->ocur = XML_ELEMENT_CONTENT_OPT;
5275 NEXT;
5276 } else if (RAW == '*') {
5277 last->ocur = XML_ELEMENT_CONTENT_MULT;
5278 NEXT;
5279 } else if (RAW == '+') {
5280 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5281 NEXT;
5282 } else {
5283 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5284 }
5285 }
5286 SKIP_BLANKS;
5287 GROW;
5288 }
5289 if ((cur != NULL) && (last != NULL)) {
5290 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005291 if (last != NULL)
5292 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005293 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005294 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005295 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5296"Element content declaration doesn't start and stop in the same entity\n",
5297 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005298 }
Owen Taylor3473f882001-02-23 17:55:21 +00005299 NEXT;
5300 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005301 if (ret != NULL) {
5302 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5303 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5304 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5305 else
5306 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5307 }
Owen Taylor3473f882001-02-23 17:55:21 +00005308 NEXT;
5309 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005310 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005311 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005312 cur = ret;
5313 /*
5314 * Some normalization:
5315 * (a | b* | c?)* == (a | b | c)*
5316 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005317 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005318 if ((cur->c1 != NULL) &&
5319 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5320 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5321 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5322 if ((cur->c2 != NULL) &&
5323 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5324 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5325 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5326 cur = cur->c2;
5327 }
5328 }
Owen Taylor3473f882001-02-23 17:55:21 +00005329 NEXT;
5330 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005331 if (ret != NULL) {
5332 int found = 0;
5333
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005334 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5335 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5336 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005337 else
5338 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005339 /*
5340 * Some normalization:
5341 * (a | b*)+ == (a | b)*
5342 * (a | b?)+ == (a | b)*
5343 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005344 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005345 if ((cur->c1 != NULL) &&
5346 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5347 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5348 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5349 found = 1;
5350 }
5351 if ((cur->c2 != NULL) &&
5352 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5354 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5355 found = 1;
5356 }
5357 cur = cur->c2;
5358 }
5359 if (found)
5360 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5361 }
Owen Taylor3473f882001-02-23 17:55:21 +00005362 NEXT;
5363 }
5364 return(ret);
5365}
5366
5367/**
5368 * xmlParseElementContentDecl:
5369 * @ctxt: an XML parser context
5370 * @name: the name of the element being defined.
5371 * @result: the Element Content pointer will be stored here if any
5372 *
5373 * parse the declaration for an Element content either Mixed or Children,
5374 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5375 *
5376 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5377 *
5378 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5379 */
5380
5381int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005382xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005383 xmlElementContentPtr *result) {
5384
5385 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005386 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005387 int res;
5388
5389 *result = NULL;
5390
5391 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005392 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005393 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 return(-1);
5395 }
5396 NEXT;
5397 GROW;
5398 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005399 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005400 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 res = XML_ELEMENT_TYPE_MIXED;
5402 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005403 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005404 res = XML_ELEMENT_TYPE_ELEMENT;
5405 }
Owen Taylor3473f882001-02-23 17:55:21 +00005406 SKIP_BLANKS;
5407 *result = tree;
5408 return(res);
5409}
5410
5411/**
5412 * xmlParseElementDecl:
5413 * @ctxt: an XML parser context
5414 *
5415 * parse an Element declaration.
5416 *
5417 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5418 *
5419 * [ VC: Unique Element Type Declaration ]
5420 * No element type may be declared more than once
5421 *
5422 * Returns the type of the element, or -1 in case of error
5423 */
5424int
5425xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005426 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005427 int ret = -1;
5428 xmlElementContentPtr content = NULL;
5429
Daniel Veillard4c778d82005-01-23 17:37:44 +00005430 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005431 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005432 xmlParserInputPtr input = ctxt->input;
5433
5434 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005435 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005438 }
5439 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005440 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005441 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5443 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005444 return(-1);
5445 }
5446 while ((RAW == 0) && (ctxt->inputNr > 1))
5447 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5450 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005451 }
5452 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005453 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005454 SKIP(5);
5455 /*
5456 * Element must always be empty.
5457 */
5458 ret = XML_ELEMENT_TYPE_EMPTY;
5459 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5460 (NXT(2) == 'Y')) {
5461 SKIP(3);
5462 /*
5463 * Element is a generic container.
5464 */
5465 ret = XML_ELEMENT_TYPE_ANY;
5466 } else if (RAW == '(') {
5467 ret = xmlParseElementContentDecl(ctxt, name, &content);
5468 } else {
5469 /*
5470 * [ WFC: PEs in Internal Subset ] error handling.
5471 */
5472 if ((RAW == '%') && (ctxt->external == 0) &&
5473 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005474 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005475 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005476 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005477 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005478 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5479 }
Owen Taylor3473f882001-02-23 17:55:21 +00005480 return(-1);
5481 }
5482
5483 SKIP_BLANKS;
5484 /*
5485 * Pop-up of finished entities.
5486 */
5487 while ((RAW == 0) && (ctxt->inputNr > 1))
5488 xmlPopInput(ctxt);
5489 SKIP_BLANKS;
5490
5491 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005492 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005493 if (content != NULL) {
5494 xmlFreeDocElementContent(ctxt->myDoc, content);
5495 }
Owen Taylor3473f882001-02-23 17:55:21 +00005496 } else {
5497 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005498 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5499 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
5501
5502 NEXT;
5503 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005504 (ctxt->sax->elementDecl != NULL)) {
5505 if (content != NULL)
5506 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005507 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5508 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005509 if ((content != NULL) && (content->parent == NULL)) {
5510 /*
5511 * this is a trick: if xmlAddElementDecl is called,
5512 * instead of copying the full tree it is plugged directly
5513 * if called from the parser. Avoid duplicating the
5514 * interfaces or change the API/ABI
5515 */
5516 xmlFreeDocElementContent(ctxt->myDoc, content);
5517 }
5518 } else if (content != NULL) {
5519 xmlFreeDocElementContent(ctxt->myDoc, content);
5520 }
Owen Taylor3473f882001-02-23 17:55:21 +00005521 }
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
5523 return(ret);
5524}
5525
5526/**
Owen Taylor3473f882001-02-23 17:55:21 +00005527 * xmlParseConditionalSections
5528 * @ctxt: an XML parser context
5529 *
5530 * [61] conditionalSect ::= includeSect | ignoreSect
5531 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5532 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5533 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5534 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5535 */
5536
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005537static void
Owen Taylor3473f882001-02-23 17:55:21 +00005538xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5539 SKIP(3);
5540 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005541 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005542 SKIP(7);
5543 SKIP_BLANKS;
5544 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005545 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005546 } else {
5547 NEXT;
5548 }
5549 if (xmlParserDebugEntities) {
5550 if ((ctxt->input != NULL) && (ctxt->input->filename))
5551 xmlGenericError(xmlGenericErrorContext,
5552 "%s(%d): ", ctxt->input->filename,
5553 ctxt->input->line);
5554 xmlGenericError(xmlGenericErrorContext,
5555 "Entering INCLUDE Conditional Section\n");
5556 }
5557
5558 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5559 (NXT(2) != '>'))) {
5560 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005561 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005562
5563 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5564 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005565 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005566 NEXT;
5567 } else if (RAW == '%') {
5568 xmlParsePEReference(ctxt);
5569 } else
5570 xmlParseMarkupDecl(ctxt);
5571
5572 /*
5573 * Pop-up of finished entities.
5574 */
5575 while ((RAW == 0) && (ctxt->inputNr > 1))
5576 xmlPopInput(ctxt);
5577
Daniel Veillardfdc91562002-07-01 21:52:03 +00005578 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 break;
5581 }
5582 }
5583 if (xmlParserDebugEntities) {
5584 if ((ctxt->input != NULL) && (ctxt->input->filename))
5585 xmlGenericError(xmlGenericErrorContext,
5586 "%s(%d): ", ctxt->input->filename,
5587 ctxt->input->line);
5588 xmlGenericError(xmlGenericErrorContext,
5589 "Leaving INCLUDE Conditional Section\n");
5590 }
5591
Daniel Veillarda07050d2003-10-19 14:46:32 +00005592 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005593 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005594 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005595 int depth = 0;
5596
5597 SKIP(6);
5598 SKIP_BLANKS;
5599 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005600 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005601 } else {
5602 NEXT;
5603 }
5604 if (xmlParserDebugEntities) {
5605 if ((ctxt->input != NULL) && (ctxt->input->filename))
5606 xmlGenericError(xmlGenericErrorContext,
5607 "%s(%d): ", ctxt->input->filename,
5608 ctxt->input->line);
5609 xmlGenericError(xmlGenericErrorContext,
5610 "Entering IGNORE Conditional Section\n");
5611 }
5612
5613 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005614 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005615 * But disable SAX event generating DTD building in the meantime
5616 */
5617 state = ctxt->disableSAX;
5618 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005619 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005620 ctxt->instate = XML_PARSER_IGNORE;
5621
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005622 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005623 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5624 depth++;
5625 SKIP(3);
5626 continue;
5627 }
5628 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5629 if (--depth >= 0) SKIP(3);
5630 continue;
5631 }
5632 NEXT;
5633 continue;
5634 }
5635
5636 ctxt->disableSAX = state;
5637 ctxt->instate = instate;
5638
5639 if (xmlParserDebugEntities) {
5640 if ((ctxt->input != NULL) && (ctxt->input->filename))
5641 xmlGenericError(xmlGenericErrorContext,
5642 "%s(%d): ", ctxt->input->filename,
5643 ctxt->input->line);
5644 xmlGenericError(xmlGenericErrorContext,
5645 "Leaving IGNORE Conditional Section\n");
5646 }
5647
5648 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005649 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005650 }
5651
5652 if (RAW == 0)
5653 SHRINK;
5654
5655 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005656 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005657 } else {
5658 SKIP(3);
5659 }
5660}
5661
5662/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005663 * xmlParseMarkupDecl:
5664 * @ctxt: an XML parser context
5665 *
5666 * parse Markup declarations
5667 *
5668 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5669 * NotationDecl | PI | Comment
5670 *
5671 * [ VC: Proper Declaration/PE Nesting ]
5672 * Parameter-entity replacement text must be properly nested with
5673 * markup declarations. That is to say, if either the first character
5674 * or the last character of a markup declaration (markupdecl above) is
5675 * contained in the replacement text for a parameter-entity reference,
5676 * both must be contained in the same replacement text.
5677 *
5678 * [ WFC: PEs in Internal Subset ]
5679 * In the internal DTD subset, parameter-entity references can occur
5680 * only where markup declarations can occur, not within markup declarations.
5681 * (This does not apply to references that occur in external parameter
5682 * entities or to the external subset.)
5683 */
5684void
5685xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5686 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005687 if (CUR == '<') {
5688 if (NXT(1) == '!') {
5689 switch (NXT(2)) {
5690 case 'E':
5691 if (NXT(3) == 'L')
5692 xmlParseElementDecl(ctxt);
5693 else if (NXT(3) == 'N')
5694 xmlParseEntityDecl(ctxt);
5695 break;
5696 case 'A':
5697 xmlParseAttributeListDecl(ctxt);
5698 break;
5699 case 'N':
5700 xmlParseNotationDecl(ctxt);
5701 break;
5702 case '-':
5703 xmlParseComment(ctxt);
5704 break;
5705 default:
5706 /* there is an error but it will be detected later */
5707 break;
5708 }
5709 } else if (NXT(1) == '?') {
5710 xmlParsePI(ctxt);
5711 }
5712 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005713 /*
5714 * This is only for internal subset. On external entities,
5715 * the replacement is done before parsing stage
5716 */
5717 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5718 xmlParsePEReference(ctxt);
5719
5720 /*
5721 * Conditional sections are allowed from entities included
5722 * by PE References in the internal subset.
5723 */
5724 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5725 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5726 xmlParseConditionalSections(ctxt);
5727 }
5728 }
5729
5730 ctxt->instate = XML_PARSER_DTD;
5731}
5732
5733/**
5734 * xmlParseTextDecl:
5735 * @ctxt: an XML parser context
5736 *
5737 * parse an XML declaration header for external entities
5738 *
5739 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5740 *
5741 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5742 */
5743
5744void
5745xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5746 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005747 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005748
5749 /*
5750 * We know that '<?xml' is here.
5751 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005752 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005753 SKIP(5);
5754 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005755 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005756 return;
5757 }
5758
William M. Brack76e95df2003-10-18 16:20:14 +00005759 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5761 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005762 }
5763 SKIP_BLANKS;
5764
5765 /*
5766 * We may have the VersionInfo here.
5767 */
5768 version = xmlParseVersionInfo(ctxt);
5769 if (version == NULL)
5770 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005771 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005772 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005773 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005775 }
5776 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005777 ctxt->input->version = version;
5778
5779 /*
5780 * We must have the encoding declaration
5781 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005782 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005783 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5784 /*
5785 * The XML REC instructs us to stop parsing right here
5786 */
5787 return;
5788 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005789 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5790 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5791 "Missing encoding in text declaration\n");
5792 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005793
5794 SKIP_BLANKS;
5795 if ((RAW == '?') && (NXT(1) == '>')) {
5796 SKIP(2);
5797 } else if (RAW == '>') {
5798 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005800 NEXT;
5801 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005802 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005803 MOVETO_ENDTAG(CUR_PTR);
5804 NEXT;
5805 }
5806}
5807
5808/**
Owen Taylor3473f882001-02-23 17:55:21 +00005809 * xmlParseExternalSubset:
5810 * @ctxt: an XML parser context
5811 * @ExternalID: the external identifier
5812 * @SystemID: the system identifier (or URL)
5813 *
5814 * parse Markup declarations from an external subset
5815 *
5816 * [30] extSubset ::= textDecl? extSubsetDecl
5817 *
5818 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5819 */
5820void
5821xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5822 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005823 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005824 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005825 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005826 xmlParseTextDecl(ctxt);
5827 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5828 /*
5829 * The XML REC instructs us to stop parsing right here
5830 */
5831 ctxt->instate = XML_PARSER_EOF;
5832 return;
5833 }
5834 }
5835 if (ctxt->myDoc == NULL) {
5836 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5837 }
5838 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5839 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5840
5841 ctxt->instate = XML_PARSER_DTD;
5842 ctxt->external = 1;
5843 while (((RAW == '<') && (NXT(1) == '?')) ||
5844 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005845 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005846 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005847 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005848
5849 GROW;
5850 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5851 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005852 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005853 NEXT;
5854 } else if (RAW == '%') {
5855 xmlParsePEReference(ctxt);
5856 } else
5857 xmlParseMarkupDecl(ctxt);
5858
5859 /*
5860 * Pop-up of finished entities.
5861 */
5862 while ((RAW == 0) && (ctxt->inputNr > 1))
5863 xmlPopInput(ctxt);
5864
Daniel Veillardfdc91562002-07-01 21:52:03 +00005865 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005867 break;
5868 }
5869 }
5870
5871 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005872 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005873 }
5874
5875}
5876
5877/**
5878 * xmlParseReference:
5879 * @ctxt: an XML parser context
5880 *
5881 * parse and handle entity references in content, depending on the SAX
5882 * interface, this may end-up in a call to character() if this is a
5883 * CharRef, a predefined entity, if there is no reference() callback.
5884 * or if the parser was asked to switch to that mode.
5885 *
5886 * [67] Reference ::= EntityRef | CharRef
5887 */
5888void
5889xmlParseReference(xmlParserCtxtPtr ctxt) {
5890 xmlEntityPtr ent;
5891 xmlChar *val;
5892 if (RAW != '&') return;
5893
5894 if (NXT(1) == '#') {
5895 int i = 0;
5896 xmlChar out[10];
5897 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005898 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005899
5900 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5901 /*
5902 * So we are using non-UTF-8 buffers
5903 * Check that the char fit on 8bits, if not
5904 * generate a CharRef.
5905 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005906 if (value <= 0xFF) {
5907 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005908 out[1] = 0;
5909 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5910 (!ctxt->disableSAX))
5911 ctxt->sax->characters(ctxt->userData, out, 1);
5912 } else {
5913 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005914 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005915 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005916 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5918 (!ctxt->disableSAX))
5919 ctxt->sax->reference(ctxt->userData, out);
5920 }
5921 } else {
5922 /*
5923 * Just encode the value in UTF-8
5924 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005925 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005926 out[i] = 0;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928 (!ctxt->disableSAX))
5929 ctxt->sax->characters(ctxt->userData, out, i);
5930 }
5931 } else {
5932 ent = xmlParseEntityRef(ctxt);
5933 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005934 if (!ctxt->wellFormed)
5935 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005936 if ((ent->name != NULL) &&
5937 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5938 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005939 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005940
5941
5942 /*
5943 * The first reference to the entity trigger a parsing phase
5944 * where the ent->children is filled with the result from
5945 * the parsing.
5946 */
5947 if (ent->children == NULL) {
5948 xmlChar *value;
5949 value = ent->content;
5950
5951 /*
5952 * Check that this entity is well formed
5953 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005954 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005955 (value[1] == 0) && (value[0] == '<') &&
5956 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5957 /*
5958 * DONE: get definite answer on this !!!
5959 * Lots of entity decls are used to declare a single
5960 * char
5961 * <!ENTITY lt "<">
5962 * Which seems to be valid since
5963 * 2.4: The ampersand character (&) and the left angle
5964 * bracket (<) may appear in their literal form only
5965 * when used ... They are also legal within the literal
5966 * entity value of an internal entity declaration;i
5967 * see "4.3.2 Well-Formed Parsed Entities".
5968 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5969 * Looking at the OASIS test suite and James Clark
5970 * tests, this is broken. However the XML REC uses
5971 * it. Is the XML REC not well-formed ????
5972 * This is a hack to avoid this problem
5973 *
5974 * ANSWER: since lt gt amp .. are already defined,
5975 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005976 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005977 * is lousy but acceptable.
5978 */
5979 list = xmlNewDocText(ctxt->myDoc, value);
5980 if (list != NULL) {
5981 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5982 (ent->children == NULL)) {
5983 ent->children = list;
5984 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005985 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005986 list->parent = (xmlNodePtr) ent;
5987 } else {
5988 xmlFreeNodeList(list);
5989 }
5990 } else if (list != NULL) {
5991 xmlFreeNodeList(list);
5992 }
5993 } else {
5994 /*
5995 * 4.3.2: An internal general parsed entity is well-formed
5996 * if its replacement text matches the production labeled
5997 * content.
5998 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005999
6000 void *user_data;
6001 /*
6002 * This is a bit hackish but this seems the best
6003 * way to make sure both SAX and DOM entity support
6004 * behaves okay.
6005 */
6006 if (ctxt->userData == ctxt)
6007 user_data = NULL;
6008 else
6009 user_data = ctxt->userData;
6010
Owen Taylor3473f882001-02-23 17:55:21 +00006011 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6012 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006013 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6014 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006015 ctxt->depth--;
6016 } else if (ent->etype ==
6017 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6018 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006019 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006020 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006021 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006022 ctxt->depth--;
6023 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006024 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006025 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6026 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006027 }
6028 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006029 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006030 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006031 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006032 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6033 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006034 (ent->children == NULL)) {
6035 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006036 if (ctxt->replaceEntities) {
6037 /*
6038 * Prune it directly in the generated document
6039 * except for single text nodes.
6040 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006041 if (((list->type == XML_TEXT_NODE) &&
6042 (list->next == NULL)) ||
6043 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006044 list->parent = (xmlNodePtr) ent;
6045 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006046 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006047 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006048 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006049 while (list != NULL) {
6050 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006051 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006052 if (list->next == NULL)
6053 ent->last = list;
6054 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006055 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006056 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006057#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006058 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6059 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006060#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006061 }
6062 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006063 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006064 while (list != NULL) {
6065 list->parent = (xmlNodePtr) ent;
6066 if (list->next == NULL)
6067 ent->last = list;
6068 list = list->next;
6069 }
Owen Taylor3473f882001-02-23 17:55:21 +00006070 }
6071 } else {
6072 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006073 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006074 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006075 } else if ((ret != XML_ERR_OK) &&
6076 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006077 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 } else if (list != NULL) {
6079 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006080 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006081 }
6082 }
6083 }
6084 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6085 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6086 /*
6087 * Create a node.
6088 */
6089 ctxt->sax->reference(ctxt->userData, ent->name);
6090 return;
6091 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006092 /*
6093 * There is a problem on the handling of _private for entities
6094 * (bug 155816): Should we copy the content of the field from
6095 * the entity (possibly overwriting some value set by the user
6096 * when a copy is created), should we leave it alone, or should
6097 * we try to take care of different situations? The problem
6098 * is exacerbated by the usage of this field by the xmlReader.
6099 * To fix this bug, we look at _private on the created node
6100 * and, if it's NULL, we copy in whatever was in the entity.
6101 * If it's not NULL we leave it alone. This is somewhat of a
6102 * hack - maybe we should have further tests to determine
6103 * what to do.
6104 */
Owen Taylor3473f882001-02-23 17:55:21 +00006105 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6106 /*
6107 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006108 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006109 * In the first occurrence list contains the replacement.
6110 * progressive == 2 means we are operating on the Reader
6111 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006112 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006113 if (((list == NULL) && (ent->owner == 0)) ||
6114 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006115 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006116
6117 /*
6118 * when operating on a reader, the entities definitions
6119 * are always owning the entities subtree.
6120 if (ctxt->parseMode == XML_PARSE_READER)
6121 ent->owner = 1;
6122 */
6123
Daniel Veillard62f313b2001-07-04 19:49:14 +00006124 cur = ent->children;
6125 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006126 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006127 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006128 if (nw->_private == NULL)
6129 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006130 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006131 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006132 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006133 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006134 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006135 if (cur == ent->last) {
6136 /*
6137 * needed to detect some strange empty
6138 * node cases in the reader tests
6139 */
6140 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006141 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006142 (nw->type == XML_ELEMENT_NODE) &&
6143 (nw->children == NULL))
6144 nw->extra = 1;
6145
Daniel Veillard62f313b2001-07-04 19:49:14 +00006146 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006147 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006148 cur = cur->next;
6149 }
Daniel Veillard81273902003-09-30 00:43:48 +00006150#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006151 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006152 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006153#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006154 } else if (list == NULL) {
6155 xmlNodePtr nw = NULL, cur, next, last,
6156 firstChild = NULL;
6157 /*
6158 * Copy the entity child list and make it the new
6159 * entity child list. The goal is to make sure any
6160 * ID or REF referenced will be the one from the
6161 * document content and not the entity copy.
6162 */
6163 cur = ent->children;
6164 ent->children = NULL;
6165 last = ent->last;
6166 ent->last = NULL;
6167 while (cur != NULL) {
6168 next = cur->next;
6169 cur->next = NULL;
6170 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006171 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006172 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006173 if (nw->_private == NULL)
6174 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006175 if (firstChild == NULL){
6176 firstChild = cur;
6177 }
6178 xmlAddChild((xmlNodePtr) ent, nw);
6179 xmlAddChild(ctxt->node, cur);
6180 }
6181 if (cur == last)
6182 break;
6183 cur = next;
6184 }
6185 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006186#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006187 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6188 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006189#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006190 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006191 const xmlChar *nbktext;
6192
Daniel Veillard62f313b2001-07-04 19:49:14 +00006193 /*
6194 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006195 * node with a possible previous text one which
6196 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006197 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006198 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6199 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006200 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006201 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006202 if ((ent->last != ent->children) &&
6203 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006204 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006205 xmlAddChildList(ctxt->node, ent->children);
6206 }
6207
Owen Taylor3473f882001-02-23 17:55:21 +00006208 /*
6209 * This is to avoid a nasty side effect, see
6210 * characters() in SAX.c
6211 */
6212 ctxt->nodemem = 0;
6213 ctxt->nodelen = 0;
6214 return;
6215 } else {
6216 /*
6217 * Probably running in SAX mode
6218 */
6219 xmlParserInputPtr input;
6220
6221 input = xmlNewEntityInputStream(ctxt, ent);
6222 xmlPushInput(ctxt, input);
6223 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006224 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6225 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006226 xmlParseTextDecl(ctxt);
6227 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6228 /*
6229 * The XML REC instructs us to stop parsing right here
6230 */
6231 ctxt->instate = XML_PARSER_EOF;
6232 return;
6233 }
6234 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006235 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6236 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006237 }
6238 }
6239 return;
6240 }
6241 }
6242 } else {
6243 val = ent->content;
6244 if (val == NULL) return;
6245 /*
6246 * inline the entity.
6247 */
6248 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6249 (!ctxt->disableSAX))
6250 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6251 }
6252 }
6253}
6254
6255/**
6256 * xmlParseEntityRef:
6257 * @ctxt: an XML parser context
6258 *
6259 * parse ENTITY references declarations
6260 *
6261 * [68] EntityRef ::= '&' Name ';'
6262 *
6263 * [ WFC: Entity Declared ]
6264 * In a document without any DTD, a document with only an internal DTD
6265 * subset which contains no parameter entity references, or a document
6266 * with "standalone='yes'", the Name given in the entity reference
6267 * must match that in an entity declaration, except that well-formed
6268 * documents need not declare any of the following entities: amp, lt,
6269 * gt, apos, quot. The declaration of a parameter entity must precede
6270 * any reference to it. Similarly, the declaration of a general entity
6271 * must precede any reference to it which appears in a default value in an
6272 * attribute-list declaration. Note that if entities are declared in the
6273 * external subset or in external parameter entities, a non-validating
6274 * processor is not obligated to read and process their declarations;
6275 * for such documents, the rule that an entity must be declared is a
6276 * well-formedness constraint only if standalone='yes'.
6277 *
6278 * [ WFC: Parsed Entity ]
6279 * An entity reference must not contain the name of an unparsed entity
6280 *
6281 * Returns the xmlEntityPtr if found, or NULL otherwise.
6282 */
6283xmlEntityPtr
6284xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006285 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006286 xmlEntityPtr ent = NULL;
6287
6288 GROW;
6289
6290 if (RAW == '&') {
6291 NEXT;
6292 name = xmlParseName(ctxt);
6293 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6295 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006296 } else {
6297 if (RAW == ';') {
6298 NEXT;
6299 /*
6300 * Ask first SAX for entity resolution, otherwise try the
6301 * predefined set.
6302 */
6303 if (ctxt->sax != NULL) {
6304 if (ctxt->sax->getEntity != NULL)
6305 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006306 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006307 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006308 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6309 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006310 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006311 }
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
6313 /*
6314 * [ WFC: Entity Declared ]
6315 * In a document without any DTD, a document with only an
6316 * internal DTD subset which contains no parameter entity
6317 * references, or a document with "standalone='yes'", the
6318 * Name given in the entity reference must match that in an
6319 * entity declaration, except that well-formed documents
6320 * need not declare any of the following entities: amp, lt,
6321 * gt, apos, quot.
6322 * The declaration of a parameter entity must precede any
6323 * reference to it.
6324 * Similarly, the declaration of a general entity must
6325 * precede any reference to it which appears in a default
6326 * value in an attribute-list declaration. Note that if
6327 * entities are declared in the external subset or in
6328 * external parameter entities, a non-validating processor
6329 * is not obligated to read and process their declarations;
6330 * for such documents, the rule that an entity must be
6331 * declared is a well-formedness constraint only if
6332 * standalone='yes'.
6333 */
6334 if (ent == NULL) {
6335 if ((ctxt->standalone == 1) ||
6336 ((ctxt->hasExternalSubset == 0) &&
6337 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006338 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006339 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006340 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006341 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006342 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006343 if ((ctxt->inSubset == 0) &&
6344 (ctxt->sax != NULL) &&
6345 (ctxt->sax->reference != NULL)) {
6346 ctxt->sax->reference(ctxt, name);
6347 }
Owen Taylor3473f882001-02-23 17:55:21 +00006348 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006349 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 }
6351
6352 /*
6353 * [ WFC: Parsed Entity ]
6354 * An entity reference must not contain the name of an
6355 * unparsed entity
6356 */
6357 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006358 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006359 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006360 }
6361
6362 /*
6363 * [ WFC: No External Entity References ]
6364 * Attribute values cannot contain direct or indirect
6365 * entity references to external entities.
6366 */
6367 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6368 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006369 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6370 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006371 }
6372 /*
6373 * [ WFC: No < in Attribute Values ]
6374 * The replacement text of any entity referred to directly or
6375 * indirectly in an attribute value (other than "&lt;") must
6376 * not contain a <.
6377 */
6378 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6379 (ent != NULL) &&
6380 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6381 (ent->content != NULL) &&
6382 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006383 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006384 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006385 }
6386
6387 /*
6388 * Internal check, no parameter entities here ...
6389 */
6390 else {
6391 switch (ent->etype) {
6392 case XML_INTERNAL_PARAMETER_ENTITY:
6393 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006394 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6395 "Attempt to reference the parameter entity '%s'\n",
6396 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006397 break;
6398 default:
6399 break;
6400 }
6401 }
6402
6403 /*
6404 * [ WFC: No Recursion ]
6405 * A parsed entity must not contain a recursive reference
6406 * to itself, either directly or indirectly.
6407 * Done somewhere else
6408 */
6409
6410 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006411 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 }
Owen Taylor3473f882001-02-23 17:55:21 +00006413 }
6414 }
6415 return(ent);
6416}
6417
6418/**
6419 * xmlParseStringEntityRef:
6420 * @ctxt: an XML parser context
6421 * @str: a pointer to an index in the string
6422 *
6423 * parse ENTITY references declarations, but this version parses it from
6424 * a string value.
6425 *
6426 * [68] EntityRef ::= '&' Name ';'
6427 *
6428 * [ WFC: Entity Declared ]
6429 * In a document without any DTD, a document with only an internal DTD
6430 * subset which contains no parameter entity references, or a document
6431 * with "standalone='yes'", the Name given in the entity reference
6432 * must match that in an entity declaration, except that well-formed
6433 * documents need not declare any of the following entities: amp, lt,
6434 * gt, apos, quot. The declaration of a parameter entity must precede
6435 * any reference to it. Similarly, the declaration of a general entity
6436 * must precede any reference to it which appears in a default value in an
6437 * attribute-list declaration. Note that if entities are declared in the
6438 * external subset or in external parameter entities, a non-validating
6439 * processor is not obligated to read and process their declarations;
6440 * for such documents, the rule that an entity must be declared is a
6441 * well-formedness constraint only if standalone='yes'.
6442 *
6443 * [ WFC: Parsed Entity ]
6444 * An entity reference must not contain the name of an unparsed entity
6445 *
6446 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6447 * is updated to the current location in the string.
6448 */
6449xmlEntityPtr
6450xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6451 xmlChar *name;
6452 const xmlChar *ptr;
6453 xmlChar cur;
6454 xmlEntityPtr ent = NULL;
6455
6456 if ((str == NULL) || (*str == NULL))
6457 return(NULL);
6458 ptr = *str;
6459 cur = *ptr;
6460 if (cur == '&') {
6461 ptr++;
6462 cur = *ptr;
6463 name = xmlParseStringName(ctxt, &ptr);
6464 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006465 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6466 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006467 } else {
6468 if (*ptr == ';') {
6469 ptr++;
6470 /*
6471 * Ask first SAX for entity resolution, otherwise try the
6472 * predefined set.
6473 */
6474 if (ctxt->sax != NULL) {
6475 if (ctxt->sax->getEntity != NULL)
6476 ent = ctxt->sax->getEntity(ctxt->userData, name);
6477 if (ent == NULL)
6478 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006479 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006480 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006481 }
Owen Taylor3473f882001-02-23 17:55:21 +00006482 }
6483 /*
6484 * [ WFC: Entity Declared ]
6485 * In a document without any DTD, a document with only an
6486 * internal DTD subset which contains no parameter entity
6487 * references, or a document with "standalone='yes'", the
6488 * Name given in the entity reference must match that in an
6489 * entity declaration, except that well-formed documents
6490 * need not declare any of the following entities: amp, lt,
6491 * gt, apos, quot.
6492 * The declaration of a parameter entity must precede any
6493 * reference to it.
6494 * Similarly, the declaration of a general entity must
6495 * precede any reference to it which appears in a default
6496 * value in an attribute-list declaration. Note that if
6497 * entities are declared in the external subset or in
6498 * external parameter entities, a non-validating processor
6499 * is not obligated to read and process their declarations;
6500 * for such documents, the rule that an entity must be
6501 * declared is a well-formedness constraint only if
6502 * standalone='yes'.
6503 */
6504 if (ent == NULL) {
6505 if ((ctxt->standalone == 1) ||
6506 ((ctxt->hasExternalSubset == 0) &&
6507 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006508 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006509 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006510 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006511 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006512 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006513 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006514 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006515 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006516 }
6517
6518 /*
6519 * [ WFC: Parsed Entity ]
6520 * An entity reference must not contain the name of an
6521 * unparsed entity
6522 */
6523 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006524 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006525 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006526 }
6527
6528 /*
6529 * [ WFC: No External Entity References ]
6530 * Attribute values cannot contain direct or indirect
6531 * entity references to external entities.
6532 */
6533 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6534 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006535 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006536 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006537 }
6538 /*
6539 * [ WFC: No < in Attribute Values ]
6540 * The replacement text of any entity referred to directly or
6541 * indirectly in an attribute value (other than "&lt;") must
6542 * not contain a <.
6543 */
6544 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6545 (ent != NULL) &&
6546 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6547 (ent->content != NULL) &&
6548 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006549 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6550 "'<' in entity '%s' is not allowed in attributes values\n",
6551 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006552 }
6553
6554 /*
6555 * Internal check, no parameter entities here ...
6556 */
6557 else {
6558 switch (ent->etype) {
6559 case XML_INTERNAL_PARAMETER_ENTITY:
6560 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006561 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6562 "Attempt to reference the parameter entity '%s'\n",
6563 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006564 break;
6565 default:
6566 break;
6567 }
6568 }
6569
6570 /*
6571 * [ WFC: No Recursion ]
6572 * A parsed entity must not contain a recursive reference
6573 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006574 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006575 */
6576
6577 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006578 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580 xmlFree(name);
6581 }
6582 }
6583 *str = ptr;
6584 return(ent);
6585}
6586
6587/**
6588 * xmlParsePEReference:
6589 * @ctxt: an XML parser context
6590 *
6591 * parse PEReference declarations
6592 * The entity content is handled directly by pushing it's content as
6593 * a new input stream.
6594 *
6595 * [69] PEReference ::= '%' Name ';'
6596 *
6597 * [ WFC: No Recursion ]
6598 * A parsed entity must not contain a recursive
6599 * reference to itself, either directly or indirectly.
6600 *
6601 * [ WFC: Entity Declared ]
6602 * In a document without any DTD, a document with only an internal DTD
6603 * subset which contains no parameter entity references, or a document
6604 * with "standalone='yes'", ... ... The declaration of a parameter
6605 * entity must precede any reference to it...
6606 *
6607 * [ VC: Entity Declared ]
6608 * In a document with an external subset or external parameter entities
6609 * with "standalone='no'", ... ... The declaration of a parameter entity
6610 * must precede any reference to it...
6611 *
6612 * [ WFC: In DTD ]
6613 * Parameter-entity references may only appear in the DTD.
6614 * NOTE: misleading but this is handled.
6615 */
6616void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006617xmlParsePEReference(xmlParserCtxtPtr ctxt)
6618{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006619 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 xmlEntityPtr entity = NULL;
6621 xmlParserInputPtr input;
6622
6623 if (RAW == '%') {
6624 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006625 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006626 if (name == NULL) {
6627 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6628 "xmlParsePEReference: no name\n");
6629 } else {
6630 if (RAW == ';') {
6631 NEXT;
6632 if ((ctxt->sax != NULL) &&
6633 (ctxt->sax->getParameterEntity != NULL))
6634 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6635 name);
6636 if (entity == NULL) {
6637 /*
6638 * [ WFC: Entity Declared ]
6639 * In a document without any DTD, a document with only an
6640 * internal DTD subset which contains no parameter entity
6641 * references, or a document with "standalone='yes'", ...
6642 * ... The declaration of a parameter entity must precede
6643 * any reference to it...
6644 */
6645 if ((ctxt->standalone == 1) ||
6646 ((ctxt->hasExternalSubset == 0) &&
6647 (ctxt->hasPErefs == 0))) {
6648 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6649 "PEReference: %%%s; not found\n",
6650 name);
6651 } else {
6652 /*
6653 * [ VC: Entity Declared ]
6654 * In a document with an external subset or external
6655 * parameter entities with "standalone='no'", ...
6656 * ... The declaration of a parameter entity must
6657 * precede any reference to it...
6658 */
6659 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6660 "PEReference: %%%s; not found\n",
6661 name, NULL);
6662 ctxt->valid = 0;
6663 }
6664 } else {
6665 /*
6666 * Internal checking in case the entity quest barfed
6667 */
6668 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6669 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6670 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6671 "Internal: %%%s; is not a parameter entity\n",
6672 name, NULL);
6673 } else if (ctxt->input->free != deallocblankswrapper) {
6674 input =
6675 xmlNewBlanksWrapperInputStream(ctxt, entity);
6676 xmlPushInput(ctxt, input);
6677 } else {
6678 /*
6679 * TODO !!!
6680 * handle the extra spaces added before and after
6681 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6682 */
6683 input = xmlNewEntityInputStream(ctxt, entity);
6684 xmlPushInput(ctxt, input);
6685 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006686 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006687 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006688 xmlParseTextDecl(ctxt);
6689 if (ctxt->errNo ==
6690 XML_ERR_UNSUPPORTED_ENCODING) {
6691 /*
6692 * The XML REC instructs us to stop parsing
6693 * right here
6694 */
6695 ctxt->instate = XML_PARSER_EOF;
6696 return;
6697 }
6698 }
6699 }
6700 }
6701 ctxt->hasPErefs = 1;
6702 } else {
6703 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6704 }
6705 }
Owen Taylor3473f882001-02-23 17:55:21 +00006706 }
6707}
6708
6709/**
6710 * xmlParseStringPEReference:
6711 * @ctxt: an XML parser context
6712 * @str: a pointer to an index in the string
6713 *
6714 * parse PEReference declarations
6715 *
6716 * [69] PEReference ::= '%' Name ';'
6717 *
6718 * [ WFC: No Recursion ]
6719 * A parsed entity must not contain a recursive
6720 * reference to itself, either directly or indirectly.
6721 *
6722 * [ WFC: Entity Declared ]
6723 * In a document without any DTD, a document with only an internal DTD
6724 * subset which contains no parameter entity references, or a document
6725 * with "standalone='yes'", ... ... The declaration of a parameter
6726 * entity must precede any reference to it...
6727 *
6728 * [ VC: Entity Declared ]
6729 * In a document with an external subset or external parameter entities
6730 * with "standalone='no'", ... ... The declaration of a parameter entity
6731 * must precede any reference to it...
6732 *
6733 * [ WFC: In DTD ]
6734 * Parameter-entity references may only appear in the DTD.
6735 * NOTE: misleading but this is handled.
6736 *
6737 * Returns the string of the entity content.
6738 * str is updated to the current value of the index
6739 */
6740xmlEntityPtr
6741xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6742 const xmlChar *ptr;
6743 xmlChar cur;
6744 xmlChar *name;
6745 xmlEntityPtr entity = NULL;
6746
6747 if ((str == NULL) || (*str == NULL)) return(NULL);
6748 ptr = *str;
6749 cur = *ptr;
6750 if (cur == '%') {
6751 ptr++;
6752 cur = *ptr;
6753 name = xmlParseStringName(ctxt, &ptr);
6754 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006755 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6756 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006757 } else {
6758 cur = *ptr;
6759 if (cur == ';') {
6760 ptr++;
6761 cur = *ptr;
6762 if ((ctxt->sax != NULL) &&
6763 (ctxt->sax->getParameterEntity != NULL))
6764 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6765 name);
6766 if (entity == NULL) {
6767 /*
6768 * [ WFC: Entity Declared ]
6769 * In a document without any DTD, a document with only an
6770 * internal DTD subset which contains no parameter entity
6771 * references, or a document with "standalone='yes'", ...
6772 * ... The declaration of a parameter entity must precede
6773 * any reference to it...
6774 */
6775 if ((ctxt->standalone == 1) ||
6776 ((ctxt->hasExternalSubset == 0) &&
6777 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006779 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 } else {
6781 /*
6782 * [ VC: Entity Declared ]
6783 * In a document with an external subset or external
6784 * parameter entities with "standalone='no'", ...
6785 * ... The declaration of a parameter entity must
6786 * precede any reference to it...
6787 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006788 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6789 "PEReference: %%%s; not found\n",
6790 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006791 ctxt->valid = 0;
6792 }
6793 } else {
6794 /*
6795 * Internal checking in case the entity quest barfed
6796 */
6797 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6798 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006799 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6800 "%%%s; is not a parameter entity\n",
6801 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006802 }
6803 }
6804 ctxt->hasPErefs = 1;
6805 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006806 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006807 }
6808 xmlFree(name);
6809 }
6810 }
6811 *str = ptr;
6812 return(entity);
6813}
6814
6815/**
6816 * xmlParseDocTypeDecl:
6817 * @ctxt: an XML parser context
6818 *
6819 * parse a DOCTYPE declaration
6820 *
6821 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6822 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6823 *
6824 * [ VC: Root Element Type ]
6825 * The Name in the document type declaration must match the element
6826 * type of the root element.
6827 */
6828
6829void
6830xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006831 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006832 xmlChar *ExternalID = NULL;
6833 xmlChar *URI = NULL;
6834
6835 /*
6836 * We know that '<!DOCTYPE' has been detected.
6837 */
6838 SKIP(9);
6839
6840 SKIP_BLANKS;
6841
6842 /*
6843 * Parse the DOCTYPE name.
6844 */
6845 name = xmlParseName(ctxt);
6846 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006847 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6848 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006849 }
6850 ctxt->intSubName = name;
6851
6852 SKIP_BLANKS;
6853
6854 /*
6855 * Check for SystemID and ExternalID
6856 */
6857 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6858
6859 if ((URI != NULL) || (ExternalID != NULL)) {
6860 ctxt->hasExternalSubset = 1;
6861 }
6862 ctxt->extSubURI = URI;
6863 ctxt->extSubSystem = ExternalID;
6864
6865 SKIP_BLANKS;
6866
6867 /*
6868 * Create and update the internal subset.
6869 */
6870 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6871 (!ctxt->disableSAX))
6872 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6873
6874 /*
6875 * Is there any internal subset declarations ?
6876 * they are handled separately in xmlParseInternalSubset()
6877 */
6878 if (RAW == '[')
6879 return;
6880
6881 /*
6882 * We should be at the end of the DOCTYPE declaration.
6883 */
6884 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006885 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006886 }
6887 NEXT;
6888}
6889
6890/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006891 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006892 * @ctxt: an XML parser context
6893 *
6894 * parse the internal subset declaration
6895 *
6896 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6897 */
6898
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006899static void
Owen Taylor3473f882001-02-23 17:55:21 +00006900xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6901 /*
6902 * Is there any DTD definition ?
6903 */
6904 if (RAW == '[') {
6905 ctxt->instate = XML_PARSER_DTD;
6906 NEXT;
6907 /*
6908 * Parse the succession of Markup declarations and
6909 * PEReferences.
6910 * Subsequence (markupdecl | PEReference | S)*
6911 */
6912 while (RAW != ']') {
6913 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006914 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006915
6916 SKIP_BLANKS;
6917 xmlParseMarkupDecl(ctxt);
6918 xmlParsePEReference(ctxt);
6919
6920 /*
6921 * Pop-up of finished entities.
6922 */
6923 while ((RAW == 0) && (ctxt->inputNr > 1))
6924 xmlPopInput(ctxt);
6925
6926 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006927 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006928 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006929 break;
6930 }
6931 }
6932 if (RAW == ']') {
6933 NEXT;
6934 SKIP_BLANKS;
6935 }
6936 }
6937
6938 /*
6939 * We should be at the end of the DOCTYPE declaration.
6940 */
6941 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006942 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006943 }
6944 NEXT;
6945}
6946
Daniel Veillard81273902003-09-30 00:43:48 +00006947#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006948/**
6949 * xmlParseAttribute:
6950 * @ctxt: an XML parser context
6951 * @value: a xmlChar ** used to store the value of the attribute
6952 *
6953 * parse an attribute
6954 *
6955 * [41] Attribute ::= Name Eq AttValue
6956 *
6957 * [ WFC: No External Entity References ]
6958 * Attribute values cannot contain direct or indirect entity references
6959 * to external entities.
6960 *
6961 * [ WFC: No < in Attribute Values ]
6962 * The replacement text of any entity referred to directly or indirectly in
6963 * an attribute value (other than "&lt;") must not contain a <.
6964 *
6965 * [ VC: Attribute Value Type ]
6966 * The attribute must have been declared; the value must be of the type
6967 * declared for it.
6968 *
6969 * [25] Eq ::= S? '=' S?
6970 *
6971 * With namespace:
6972 *
6973 * [NS 11] Attribute ::= QName Eq AttValue
6974 *
6975 * Also the case QName == xmlns:??? is handled independently as a namespace
6976 * definition.
6977 *
6978 * Returns the attribute name, and the value in *value.
6979 */
6980
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006981const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006982xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006983 const xmlChar *name;
6984 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006985
6986 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006987 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006988 name = xmlParseName(ctxt);
6989 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006990 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006991 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006992 return(NULL);
6993 }
6994
6995 /*
6996 * read the value
6997 */
6998 SKIP_BLANKS;
6999 if (RAW == '=') {
7000 NEXT;
7001 SKIP_BLANKS;
7002 val = xmlParseAttValue(ctxt);
7003 ctxt->instate = XML_PARSER_CONTENT;
7004 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007005 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007006 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007007 return(NULL);
7008 }
7009
7010 /*
7011 * Check that xml:lang conforms to the specification
7012 * No more registered as an error, just generate a warning now
7013 * since this was deprecated in XML second edition
7014 */
7015 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7016 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007017 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7018 "Malformed value for xml:lang : %s\n",
7019 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007020 }
7021 }
7022
7023 /*
7024 * Check that xml:space conforms to the specification
7025 */
7026 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7027 if (xmlStrEqual(val, BAD_CAST "default"))
7028 *(ctxt->space) = 0;
7029 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7030 *(ctxt->space) = 1;
7031 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007032 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007033"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007034 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 }
7036 }
7037
7038 *value = val;
7039 return(name);
7040}
7041
7042/**
7043 * xmlParseStartTag:
7044 * @ctxt: an XML parser context
7045 *
7046 * parse a start of tag either for rule element or
7047 * EmptyElement. In both case we don't parse the tag closing chars.
7048 *
7049 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7050 *
7051 * [ WFC: Unique Att Spec ]
7052 * No attribute name may appear more than once in the same start-tag or
7053 * empty-element tag.
7054 *
7055 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7056 *
7057 * [ WFC: Unique Att Spec ]
7058 * No attribute name may appear more than once in the same start-tag or
7059 * empty-element tag.
7060 *
7061 * With namespace:
7062 *
7063 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7064 *
7065 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7066 *
7067 * Returns the element name parsed
7068 */
7069
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007070const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007071xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007072 const xmlChar *name;
7073 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007074 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007075 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007076 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007077 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007078 int i;
7079
7080 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007081 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007082
7083 name = xmlParseName(ctxt);
7084 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007086 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007087 return(NULL);
7088 }
7089
7090 /*
7091 * Now parse the attributes, it ends up with the ending
7092 *
7093 * (S Attribute)* S?
7094 */
7095 SKIP_BLANKS;
7096 GROW;
7097
Daniel Veillard21a0f912001-02-25 19:54:14 +00007098 while ((RAW != '>') &&
7099 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007100 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007101 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007102 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007103
7104 attname = xmlParseAttribute(ctxt, &attvalue);
7105 if ((attname != NULL) && (attvalue != NULL)) {
7106 /*
7107 * [ WFC: Unique Att Spec ]
7108 * No attribute name may appear more than once in the same
7109 * start-tag or empty-element tag.
7110 */
7111 for (i = 0; i < nbatts;i += 2) {
7112 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007113 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007114 xmlFree(attvalue);
7115 goto failed;
7116 }
7117 }
Owen Taylor3473f882001-02-23 17:55:21 +00007118 /*
7119 * Add the pair to atts
7120 */
7121 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007122 maxatts = 22; /* allow for 10 attrs by default */
7123 atts = (const xmlChar **)
7124 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007125 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007126 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007127 if (attvalue != NULL)
7128 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007129 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007130 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007131 ctxt->atts = atts;
7132 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007133 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007134 const xmlChar **n;
7135
Owen Taylor3473f882001-02-23 17:55:21 +00007136 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007137 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007138 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007139 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007140 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007141 if (attvalue != NULL)
7142 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007143 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007144 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007145 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007146 ctxt->atts = atts;
7147 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007148 }
7149 atts[nbatts++] = attname;
7150 atts[nbatts++] = attvalue;
7151 atts[nbatts] = NULL;
7152 atts[nbatts + 1] = NULL;
7153 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007154 if (attvalue != NULL)
7155 xmlFree(attvalue);
7156 }
7157
7158failed:
7159
Daniel Veillard3772de32002-12-17 10:31:45 +00007160 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007161 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7162 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007163 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7165 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007166 }
7167 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007168 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7169 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007170 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7171 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007172 break;
7173 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007174 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007175 GROW;
7176 }
7177
7178 /*
7179 * SAX: Start of Element !
7180 */
7181 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007182 (!ctxt->disableSAX)) {
7183 if (nbatts > 0)
7184 ctxt->sax->startElement(ctxt->userData, name, atts);
7185 else
7186 ctxt->sax->startElement(ctxt->userData, name, NULL);
7187 }
Owen Taylor3473f882001-02-23 17:55:21 +00007188
7189 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007190 /* Free only the content strings */
7191 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 if (atts[i] != NULL)
7193 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007194 }
7195 return(name);
7196}
7197
7198/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007199 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007200 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007201 * @line: line of the start tag
7202 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007203 *
7204 * parse an end of tag
7205 *
7206 * [42] ETag ::= '</' Name S? '>'
7207 *
7208 * With namespace
7209 *
7210 * [NS 9] ETag ::= '</' QName S? '>'
7211 */
7212
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007213static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007214xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007215 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007216
7217 GROW;
7218 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007219 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007220 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007221 return;
7222 }
7223 SKIP(2);
7224
Daniel Veillard46de64e2002-05-29 08:21:33 +00007225 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007226
7227 /*
7228 * We should definitely be at the ending "S? '>'" part
7229 */
7230 GROW;
7231 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007232 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007233 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007234 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007235 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007236
7237 /*
7238 * [ WFC: Element Type Match ]
7239 * The Name in an element's end-tag must match the element type in the
7240 * start-tag.
7241 *
7242 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007243 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007244 if (name == NULL) name = BAD_CAST "unparseable";
7245 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007246 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007247 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007248 }
7249
7250 /*
7251 * SAX: End of Tag
7252 */
7253 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7254 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007255 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007256
Daniel Veillarde57ec792003-09-10 10:50:59 +00007257 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007258 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007259 return;
7260}
7261
7262/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007263 * xmlParseEndTag:
7264 * @ctxt: an XML parser context
7265 *
7266 * parse an end of tag
7267 *
7268 * [42] ETag ::= '</' Name S? '>'
7269 *
7270 * With namespace
7271 *
7272 * [NS 9] ETag ::= '</' QName S? '>'
7273 */
7274
7275void
7276xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007277 xmlParseEndTag1(ctxt, 0);
7278}
Daniel Veillard81273902003-09-30 00:43:48 +00007279#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280
7281/************************************************************************
7282 * *
7283 * SAX 2 specific operations *
7284 * *
7285 ************************************************************************/
7286
7287static const xmlChar *
7288xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7289 int len = 0, l;
7290 int c;
7291 int count = 0;
7292
7293 /*
7294 * Handler for more complex cases
7295 */
7296 GROW;
7297 c = CUR_CHAR(l);
7298 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007299 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007300 return(NULL);
7301 }
7302
7303 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007304 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007305 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007306 (IS_COMBINING(c)) ||
7307 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 if (count++ > 100) {
7309 count = 0;
7310 GROW;
7311 }
7312 len += l;
7313 NEXTL(l);
7314 c = CUR_CHAR(l);
7315 }
7316 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7317}
7318
7319/*
7320 * xmlGetNamespace:
7321 * @ctxt: an XML parser context
7322 * @prefix: the prefix to lookup
7323 *
7324 * Lookup the namespace name for the @prefix (which ca be NULL)
7325 * The prefix must come from the @ctxt->dict dictionnary
7326 *
7327 * Returns the namespace name or NULL if not bound
7328 */
7329static const xmlChar *
7330xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7331 int i;
7332
Daniel Veillarde57ec792003-09-10 10:50:59 +00007333 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007335 if (ctxt->nsTab[i] == prefix) {
7336 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7337 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007338 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007339 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007340 return(NULL);
7341}
7342
7343/**
7344 * xmlParseNCName:
7345 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007346 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007347 *
7348 * parse an XML name.
7349 *
7350 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7351 * CombiningChar | Extender
7352 *
7353 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7354 *
7355 * Returns the Name parsed or NULL
7356 */
7357
7358static const xmlChar *
7359xmlParseNCName(xmlParserCtxtPtr ctxt) {
7360 const xmlChar *in;
7361 const xmlChar *ret;
7362 int count = 0;
7363
7364 /*
7365 * Accelerator for simple ASCII names
7366 */
7367 in = ctxt->input->cur;
7368 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7369 ((*in >= 0x41) && (*in <= 0x5A)) ||
7370 (*in == '_')) {
7371 in++;
7372 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7373 ((*in >= 0x41) && (*in <= 0x5A)) ||
7374 ((*in >= 0x30) && (*in <= 0x39)) ||
7375 (*in == '_') || (*in == '-') ||
7376 (*in == '.'))
7377 in++;
7378 if ((*in > 0) && (*in < 0x80)) {
7379 count = in - ctxt->input->cur;
7380 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7381 ctxt->input->cur = in;
7382 ctxt->nbChars += count;
7383 ctxt->input->col += count;
7384 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007385 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007386 }
7387 return(ret);
7388 }
7389 }
7390 return(xmlParseNCNameComplex(ctxt));
7391}
7392
7393/**
7394 * xmlParseQName:
7395 * @ctxt: an XML parser context
7396 * @prefix: pointer to store the prefix part
7397 *
7398 * parse an XML Namespace QName
7399 *
7400 * [6] QName ::= (Prefix ':')? LocalPart
7401 * [7] Prefix ::= NCName
7402 * [8] LocalPart ::= NCName
7403 *
7404 * Returns the Name parsed or NULL
7405 */
7406
7407static const xmlChar *
7408xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7409 const xmlChar *l, *p;
7410
7411 GROW;
7412
7413 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007414 if (l == NULL) {
7415 if (CUR == ':') {
7416 l = xmlParseName(ctxt);
7417 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007418 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7419 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 *prefix = NULL;
7421 return(l);
7422 }
7423 }
7424 return(NULL);
7425 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 if (CUR == ':') {
7427 NEXT;
7428 p = l;
7429 l = xmlParseNCName(ctxt);
7430 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007431 xmlChar *tmp;
7432
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007433 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7434 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007435 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7436 p = xmlDictLookup(ctxt->dict, tmp, -1);
7437 if (tmp != NULL) xmlFree(tmp);
7438 *prefix = NULL;
7439 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 }
7441 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007442 xmlChar *tmp;
7443
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007444 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7445 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007446 NEXT;
7447 tmp = (xmlChar *) xmlParseName(ctxt);
7448 if (tmp != NULL) {
7449 tmp = xmlBuildQName(tmp, l, NULL, 0);
7450 l = xmlDictLookup(ctxt->dict, tmp, -1);
7451 if (tmp != NULL) xmlFree(tmp);
7452 *prefix = p;
7453 return(l);
7454 }
7455 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7456 l = xmlDictLookup(ctxt->dict, tmp, -1);
7457 if (tmp != NULL) xmlFree(tmp);
7458 *prefix = p;
7459 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007460 }
7461 *prefix = p;
7462 } else
7463 *prefix = NULL;
7464 return(l);
7465}
7466
7467/**
7468 * xmlParseQNameAndCompare:
7469 * @ctxt: an XML parser context
7470 * @name: the localname
7471 * @prefix: the prefix, if any.
7472 *
7473 * parse an XML name and compares for match
7474 * (specialized for endtag parsing)
7475 *
7476 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7477 * and the name for mismatch
7478 */
7479
7480static const xmlChar *
7481xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7482 xmlChar const *prefix) {
7483 const xmlChar *cmp = name;
7484 const xmlChar *in;
7485 const xmlChar *ret;
7486 const xmlChar *prefix2;
7487
7488 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7489
7490 GROW;
7491 in = ctxt->input->cur;
7492
7493 cmp = prefix;
7494 while (*in != 0 && *in == *cmp) {
7495 ++in;
7496 ++cmp;
7497 }
7498 if ((*cmp == 0) && (*in == ':')) {
7499 in++;
7500 cmp = name;
7501 while (*in != 0 && *in == *cmp) {
7502 ++in;
7503 ++cmp;
7504 }
William M. Brack76e95df2003-10-18 16:20:14 +00007505 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 /* success */
7507 ctxt->input->cur = in;
7508 return((const xmlChar*) 1);
7509 }
7510 }
7511 /*
7512 * all strings coms from the dictionary, equality can be done directly
7513 */
7514 ret = xmlParseQName (ctxt, &prefix2);
7515 if ((ret == name) && (prefix == prefix2))
7516 return((const xmlChar*) 1);
7517 return ret;
7518}
7519
7520/**
7521 * xmlParseAttValueInternal:
7522 * @ctxt: an XML parser context
7523 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007524 * @alloc: whether the attribute was reallocated as a new string
7525 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007526 *
7527 * parse a value for an attribute.
7528 * NOTE: if no normalization is needed, the routine will return pointers
7529 * directly from the data buffer.
7530 *
7531 * 3.3.3 Attribute-Value Normalization:
7532 * Before the value of an attribute is passed to the application or
7533 * checked for validity, the XML processor must normalize it as follows:
7534 * - a character reference is processed by appending the referenced
7535 * character to the attribute value
7536 * - an entity reference is processed by recursively processing the
7537 * replacement text of the entity
7538 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7539 * appending #x20 to the normalized value, except that only a single
7540 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7541 * parsed entity or the literal entity value of an internal parsed entity
7542 * - other characters are processed by appending them to the normalized value
7543 * If the declared value is not CDATA, then the XML processor must further
7544 * process the normalized attribute value by discarding any leading and
7545 * trailing space (#x20) characters, and by replacing sequences of space
7546 * (#x20) characters by a single space (#x20) character.
7547 * All attributes for which no declaration has been read should be treated
7548 * by a non-validating parser as if declared CDATA.
7549 *
7550 * Returns the AttValue parsed or NULL. The value has to be freed by the
7551 * caller if it was copied, this can be detected by val[*len] == 0.
7552 */
7553
7554static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007555xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7556 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007557{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007558 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007559 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007560 xmlChar *ret = NULL;
7561
7562 GROW;
7563 in = (xmlChar *) CUR_PTR;
7564 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007565 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007566 return (NULL);
7567 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007569
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007570 /*
7571 * try to handle in this routine the most common case where no
7572 * allocation of a new string is required and where content is
7573 * pure ASCII.
7574 */
7575 limit = *in++;
7576 end = ctxt->input->end;
7577 start = in;
7578 if (in >= end) {
7579 const xmlChar *oldbase = ctxt->input->base;
7580 GROW;
7581 if (oldbase != ctxt->input->base) {
7582 long delta = ctxt->input->base - oldbase;
7583 start = start + delta;
7584 in = in + delta;
7585 }
7586 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007587 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007588 if (normalize) {
7589 /*
7590 * Skip any leading spaces
7591 */
7592 while ((in < end) && (*in != limit) &&
7593 ((*in == 0x20) || (*in == 0x9) ||
7594 (*in == 0xA) || (*in == 0xD))) {
7595 in++;
7596 start = in;
7597 if (in >= end) {
7598 const xmlChar *oldbase = ctxt->input->base;
7599 GROW;
7600 if (oldbase != ctxt->input->base) {
7601 long delta = ctxt->input->base - oldbase;
7602 start = start + delta;
7603 in = in + delta;
7604 }
7605 end = ctxt->input->end;
7606 }
7607 }
7608 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7609 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7610 if ((*in++ == 0x20) && (*in == 0x20)) break;
7611 if (in >= end) {
7612 const xmlChar *oldbase = ctxt->input->base;
7613 GROW;
7614 if (oldbase != ctxt->input->base) {
7615 long delta = ctxt->input->base - oldbase;
7616 start = start + delta;
7617 in = in + delta;
7618 }
7619 end = ctxt->input->end;
7620 }
7621 }
7622 last = in;
7623 /*
7624 * skip the trailing blanks
7625 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007626 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007627 while ((in < end) && (*in != limit) &&
7628 ((*in == 0x20) || (*in == 0x9) ||
7629 (*in == 0xA) || (*in == 0xD))) {
7630 in++;
7631 if (in >= end) {
7632 const xmlChar *oldbase = ctxt->input->base;
7633 GROW;
7634 if (oldbase != ctxt->input->base) {
7635 long delta = ctxt->input->base - oldbase;
7636 start = start + delta;
7637 in = in + delta;
7638 last = last + delta;
7639 }
7640 end = ctxt->input->end;
7641 }
7642 }
7643 if (*in != limit) goto need_complex;
7644 } else {
7645 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7646 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7647 in++;
7648 if (in >= end) {
7649 const xmlChar *oldbase = ctxt->input->base;
7650 GROW;
7651 if (oldbase != ctxt->input->base) {
7652 long delta = ctxt->input->base - oldbase;
7653 start = start + delta;
7654 in = in + delta;
7655 }
7656 end = ctxt->input->end;
7657 }
7658 }
7659 last = in;
7660 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007662 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007664 *len = last - start;
7665 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007666 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007667 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007668 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007669 }
7670 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007673need_complex:
7674 if (alloc) *alloc = 1;
7675 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007676}
7677
7678/**
7679 * xmlParseAttribute2:
7680 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007681 * @pref: the element prefix
7682 * @elem: the element name
7683 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007684 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007685 * @len: an int * to save the length of the attribute
7686 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007687 *
7688 * parse an attribute in the new SAX2 framework.
7689 *
7690 * Returns the attribute name, and the value in *value, .
7691 */
7692
7693static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007694xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7695 const xmlChar *pref, const xmlChar *elem,
7696 const xmlChar **prefix, xmlChar **value,
7697 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007698 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007699 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007700 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701
7702 *value = NULL;
7703 GROW;
7704 name = xmlParseQName(ctxt, prefix);
7705 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007706 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7707 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 return(NULL);
7709 }
7710
7711 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007712 * get the type if needed
7713 */
7714 if (ctxt->attsSpecial != NULL) {
7715 int type;
7716
7717 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7718 pref, elem, *prefix, name);
7719 if (type != 0) normalize = 1;
7720 }
7721
7722 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 * read the value
7724 */
7725 SKIP_BLANKS;
7726 if (RAW == '=') {
7727 NEXT;
7728 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007729 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 ctxt->instate = XML_PARSER_CONTENT;
7731 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007732 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007734 return(NULL);
7735 }
7736
Daniel Veillardd8925572005-06-08 22:34:55 +00007737 if (*prefix == ctxt->str_xml) {
7738 /*
7739 * Check that xml:lang conforms to the specification
7740 * No more registered as an error, just generate a warning now
7741 * since this was deprecated in XML second edition
7742 */
7743 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7744 internal_val = xmlStrndup(val, *len);
7745 if (!xmlCheckLanguageID(internal_val)) {
7746 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7747 "Malformed value for xml:lang : %s\n",
7748 internal_val, NULL);
7749 }
7750 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751
Daniel Veillardd8925572005-06-08 22:34:55 +00007752 /*
7753 * Check that xml:space conforms to the specification
7754 */
7755 if (xmlStrEqual(name, BAD_CAST "space")) {
7756 internal_val = xmlStrndup(val, *len);
7757 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7758 *(ctxt->space) = 0;
7759 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7760 *(ctxt->space) = 1;
7761 else {
7762 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007764 internal_val, NULL);
7765 }
7766 }
7767 if (internal_val) {
7768 xmlFree(internal_val);
7769 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007770 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007771
7772 *value = val;
7773 return(name);
7774}
7775
7776/**
7777 * xmlParseStartTag2:
7778 * @ctxt: an XML parser context
7779 *
7780 * parse a start of tag either for rule element or
7781 * EmptyElement. In both case we don't parse the tag closing chars.
7782 * This routine is called when running SAX2 parsing
7783 *
7784 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7785 *
7786 * [ WFC: Unique Att Spec ]
7787 * No attribute name may appear more than once in the same start-tag or
7788 * empty-element tag.
7789 *
7790 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7791 *
7792 * [ WFC: Unique Att Spec ]
7793 * No attribute name may appear more than once in the same start-tag or
7794 * empty-element tag.
7795 *
7796 * With namespace:
7797 *
7798 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7799 *
7800 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7801 *
7802 * Returns the element name parsed
7803 */
7804
7805static const xmlChar *
7806xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007807 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 const xmlChar *localname;
7809 const xmlChar *prefix;
7810 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007811 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 const xmlChar *nsname;
7813 xmlChar *attvalue;
7814 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007816 int nratts, nbatts, nbdef;
7817 int i, j, nbNs, attval;
7818 const xmlChar *base;
7819 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007820 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821
7822 if (RAW != '<') return(NULL);
7823 NEXT1;
7824
7825 /*
7826 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7827 * point since the attribute values may be stored as pointers to
7828 * the buffer and calling SHRINK would destroy them !
7829 * The Shrinking is only possible once the full set of attribute
7830 * callbacks have been done.
7831 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007832reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007833 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007834 base = ctxt->input->base;
7835 cur = ctxt->input->cur - ctxt->input->base;
7836 nbatts = 0;
7837 nratts = 0;
7838 nbdef = 0;
7839 nbNs = 0;
7840 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007841 /* Forget any namespaces added during an earlier parse of this element. */
7842 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007843
7844 localname = xmlParseQName(ctxt, &prefix);
7845 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007846 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7847 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007848 return(NULL);
7849 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007850 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007851
7852 /*
7853 * Now parse the attributes, it ends up with the ending
7854 *
7855 * (S Attribute)* S?
7856 */
7857 SKIP_BLANKS;
7858 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007859 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007860
7861 while ((RAW != '>') &&
7862 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007863 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007864 const xmlChar *q = CUR_PTR;
7865 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007866 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007867
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007868 attname = xmlParseAttribute2(ctxt, prefix, localname,
7869 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007870 if ((attname != NULL) && (attvalue != NULL)) {
7871 if (len < 0) len = xmlStrlen(attvalue);
7872 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007873 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7874 xmlURIPtr uri;
7875
7876 if (*URL != 0) {
7877 uri = xmlParseURI((const char *) URL);
7878 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007879 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7880 "xmlns: %s not a valid URI\n",
7881 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007882 } else {
7883 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007884 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7885 "xmlns: URI %s is not absolute\n",
7886 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007887 }
7888 xmlFreeURI(uri);
7889 }
7890 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007892 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007893 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007894 for (j = 1;j <= nbNs;j++)
7895 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7896 break;
7897 if (j <= nbNs)
7898 xmlErrAttributeDup(ctxt, NULL, attname);
7899 else
7900 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007901 if (alloc != 0) xmlFree(attvalue);
7902 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903 continue;
7904 }
7905 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007906 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7907 xmlURIPtr uri;
7908
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007909 if (attname == ctxt->str_xml) {
7910 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007911 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7912 "xml namespace prefix mapped to wrong URI\n",
7913 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007914 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007915 /*
7916 * Do not keep a namespace definition node
7917 */
7918 if (alloc != 0) xmlFree(attvalue);
7919 SKIP_BLANKS;
7920 continue;
7921 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007922 uri = xmlParseURI((const char *) URL);
7923 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007924 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7925 "xmlns:%s: '%s' is not a valid URI\n",
7926 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007927 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007928 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007929 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7930 "xmlns:%s: URI %s is not absolute\n",
7931 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007932 }
7933 xmlFreeURI(uri);
7934 }
7935
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007937 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007938 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007939 for (j = 1;j <= nbNs;j++)
7940 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7941 break;
7942 if (j <= nbNs)
7943 xmlErrAttributeDup(ctxt, aprefix, attname);
7944 else
7945 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007946 if (alloc != 0) xmlFree(attvalue);
7947 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007948 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007949 continue;
7950 }
7951
7952 /*
7953 * Add the pair to atts
7954 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7956 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007957 if (attvalue[len] == 0)
7958 xmlFree(attvalue);
7959 goto failed;
7960 }
7961 maxatts = ctxt->maxatts;
7962 atts = ctxt->atts;
7963 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007964 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007965 atts[nbatts++] = attname;
7966 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007967 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 atts[nbatts++] = attvalue;
7969 attvalue += len;
7970 atts[nbatts++] = attvalue;
7971 /*
7972 * tag if some deallocation is needed
7973 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007974 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007975 } else {
7976 if ((attvalue != NULL) && (attvalue[len] == 0))
7977 xmlFree(attvalue);
7978 }
7979
7980failed:
7981
7982 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007983 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7985 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007986 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7988 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007989 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990 }
7991 SKIP_BLANKS;
7992 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7993 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007994 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007995 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 break;
7997 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007999 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008000 }
8001
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008003 * The attributes defaulting
8004 */
8005 if (ctxt->attsDefault != NULL) {
8006 xmlDefAttrsPtr defaults;
8007
8008 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8009 if (defaults != NULL) {
8010 for (i = 0;i < defaults->nbAttrs;i++) {
8011 attname = defaults->values[4 * i];
8012 aprefix = defaults->values[4 * i + 1];
8013
8014 /*
8015 * special work for namespaces defaulted defs
8016 */
8017 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8018 /*
8019 * check that it's not a defined namespace
8020 */
8021 for (j = 1;j <= nbNs;j++)
8022 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8023 break;
8024 if (j <= nbNs) continue;
8025
8026 nsname = xmlGetNamespace(ctxt, NULL);
8027 if (nsname != defaults->values[4 * i + 2]) {
8028 if (nsPush(ctxt, NULL,
8029 defaults->values[4 * i + 2]) > 0)
8030 nbNs++;
8031 }
8032 } else if (aprefix == ctxt->str_xmlns) {
8033 /*
8034 * check that it's not a defined namespace
8035 */
8036 for (j = 1;j <= nbNs;j++)
8037 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8038 break;
8039 if (j <= nbNs) continue;
8040
8041 nsname = xmlGetNamespace(ctxt, attname);
8042 if (nsname != defaults->values[2]) {
8043 if (nsPush(ctxt, attname,
8044 defaults->values[4 * i + 2]) > 0)
8045 nbNs++;
8046 }
8047 } else {
8048 /*
8049 * check that it's not a defined attribute
8050 */
8051 for (j = 0;j < nbatts;j+=5) {
8052 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8053 break;
8054 }
8055 if (j < nbatts) continue;
8056
8057 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8058 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008059 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008060 }
8061 maxatts = ctxt->maxatts;
8062 atts = ctxt->atts;
8063 }
8064 atts[nbatts++] = attname;
8065 atts[nbatts++] = aprefix;
8066 if (aprefix == NULL)
8067 atts[nbatts++] = NULL;
8068 else
8069 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8070 atts[nbatts++] = defaults->values[4 * i + 2];
8071 atts[nbatts++] = defaults->values[4 * i + 3];
8072 nbdef++;
8073 }
8074 }
8075 }
8076 }
8077
Daniel Veillarde70c8772003-11-25 07:21:18 +00008078 /*
8079 * The attributes checkings
8080 */
8081 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008082 /*
8083 * The default namespace does not apply to attribute names.
8084 */
8085 if (atts[i + 1] != NULL) {
8086 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8087 if (nsname == NULL) {
8088 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8089 "Namespace prefix %s for %s on %s is not defined\n",
8090 atts[i + 1], atts[i], localname);
8091 }
8092 atts[i + 2] = nsname;
8093 } else
8094 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008095 /*
8096 * [ WFC: Unique Att Spec ]
8097 * No attribute name may appear more than once in the same
8098 * start-tag or empty-element tag.
8099 * As extended by the Namespace in XML REC.
8100 */
8101 for (j = 0; j < i;j += 5) {
8102 if (atts[i] == atts[j]) {
8103 if (atts[i+1] == atts[j+1]) {
8104 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8105 break;
8106 }
8107 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8108 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8109 "Namespaced Attribute %s in '%s' redefined\n",
8110 atts[i], nsname, NULL);
8111 break;
8112 }
8113 }
8114 }
8115 }
8116
Daniel Veillarde57ec792003-09-10 10:50:59 +00008117 nsname = xmlGetNamespace(ctxt, prefix);
8118 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008119 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8120 "Namespace prefix %s on %s is not defined\n",
8121 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008122 }
8123 *pref = prefix;
8124 *URI = nsname;
8125
8126 /*
8127 * SAX: Start of Element !
8128 */
8129 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8130 (!ctxt->disableSAX)) {
8131 if (nbNs > 0)
8132 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8133 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8134 nbatts / 5, nbdef, atts);
8135 else
8136 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8137 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8138 }
8139
8140 /*
8141 * Free up attribute allocated strings if needed
8142 */
8143 if (attval != 0) {
8144 for (i = 3,j = 0; j < nratts;i += 5,j++)
8145 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8146 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008147 }
8148
8149 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008150
8151base_changed:
8152 /*
8153 * the attribute strings are valid iif the base didn't changed
8154 */
8155 if (attval != 0) {
8156 for (i = 3,j = 0; j < nratts;i += 5,j++)
8157 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8158 xmlFree((xmlChar *) atts[i]);
8159 }
8160 ctxt->input->cur = ctxt->input->base + cur;
8161 if (ctxt->wellFormed == 1) {
8162 goto reparse;
8163 }
8164 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008165}
8166
8167/**
8168 * xmlParseEndTag2:
8169 * @ctxt: an XML parser context
8170 * @line: line of the start tag
8171 * @nsNr: number of namespaces on the start tag
8172 *
8173 * parse an end of tag
8174 *
8175 * [42] ETag ::= '</' Name S? '>'
8176 *
8177 * With namespace
8178 *
8179 * [NS 9] ETag ::= '</' QName S? '>'
8180 */
8181
8182static void
8183xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008184 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008185 const xmlChar *name;
8186
8187 GROW;
8188 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008189 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008190 return;
8191 }
8192 SKIP(2);
8193
William M. Brack13dfa872004-09-18 04:52:08 +00008194 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008195 if (ctxt->input->cur[tlen] == '>') {
8196 ctxt->input->cur += tlen + 1;
8197 goto done;
8198 }
8199 ctxt->input->cur += tlen;
8200 name = (xmlChar*)1;
8201 } else {
8202 if (prefix == NULL)
8203 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8204 else
8205 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8206 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207
8208 /*
8209 * We should definitely be at the ending "S? '>'" part
8210 */
8211 GROW;
8212 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008213 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008214 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008215 } else
8216 NEXT1;
8217
8218 /*
8219 * [ WFC: Element Type Match ]
8220 * The Name in an element's end-tag must match the element type in the
8221 * start-tag.
8222 *
8223 */
8224 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008225 if (name == NULL) name = BAD_CAST "unparseable";
8226 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008227 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008228 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008229 }
8230
8231 /*
8232 * SAX: End of Tag
8233 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008234done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008235 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8236 (!ctxt->disableSAX))
8237 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8238
Daniel Veillard0fb18932003-09-07 09:14:37 +00008239 spacePop(ctxt);
8240 if (nsNr != 0)
8241 nsPop(ctxt, nsNr);
8242 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008243}
8244
8245/**
Owen Taylor3473f882001-02-23 17:55:21 +00008246 * xmlParseCDSect:
8247 * @ctxt: an XML parser context
8248 *
8249 * Parse escaped pure raw content.
8250 *
8251 * [18] CDSect ::= CDStart CData CDEnd
8252 *
8253 * [19] CDStart ::= '<![CDATA['
8254 *
8255 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8256 *
8257 * [21] CDEnd ::= ']]>'
8258 */
8259void
8260xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8261 xmlChar *buf = NULL;
8262 int len = 0;
8263 int size = XML_PARSER_BUFFER_SIZE;
8264 int r, rl;
8265 int s, sl;
8266 int cur, l;
8267 int count = 0;
8268
Daniel Veillard8f597c32003-10-06 08:19:27 +00008269 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008270 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008271 SKIP(9);
8272 } else
8273 return;
8274
8275 ctxt->instate = XML_PARSER_CDATA_SECTION;
8276 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008277 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008278 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008279 ctxt->instate = XML_PARSER_CONTENT;
8280 return;
8281 }
8282 NEXTL(rl);
8283 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008284 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008285 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008286 ctxt->instate = XML_PARSER_CONTENT;
8287 return;
8288 }
8289 NEXTL(sl);
8290 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008291 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008292 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008293 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008294 return;
8295 }
William M. Brack871611b2003-10-18 04:53:14 +00008296 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008297 ((r != ']') || (s != ']') || (cur != '>'))) {
8298 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008299 xmlChar *tmp;
8300
Owen Taylor3473f882001-02-23 17:55:21 +00008301 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008302 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8303 if (tmp == NULL) {
8304 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008305 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008306 return;
8307 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008308 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008309 }
8310 COPY_BUF(rl,buf,len,r);
8311 r = s;
8312 rl = sl;
8313 s = cur;
8314 sl = l;
8315 count++;
8316 if (count > 50) {
8317 GROW;
8318 count = 0;
8319 }
8320 NEXTL(l);
8321 cur = CUR_CHAR(l);
8322 }
8323 buf[len] = 0;
8324 ctxt->instate = XML_PARSER_CONTENT;
8325 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008326 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008327 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008328 xmlFree(buf);
8329 return;
8330 }
8331 NEXTL(l);
8332
8333 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008334 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008335 */
8336 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8337 if (ctxt->sax->cdataBlock != NULL)
8338 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008339 else if (ctxt->sax->characters != NULL)
8340 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008341 }
8342 xmlFree(buf);
8343}
8344
8345/**
8346 * xmlParseContent:
8347 * @ctxt: an XML parser context
8348 *
8349 * Parse a content:
8350 *
8351 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8352 */
8353
8354void
8355xmlParseContent(xmlParserCtxtPtr ctxt) {
8356 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008357 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008358 ((RAW != '<') || (NXT(1) != '/'))) {
8359 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008360 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008361 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008364 * First case : a Processing Instruction.
8365 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008366 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008367 xmlParsePI(ctxt);
8368 }
8369
8370 /*
8371 * Second case : a CDSection
8372 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008373 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008374 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008375 xmlParseCDSect(ctxt);
8376 }
8377
8378 /*
8379 * Third case : a comment
8380 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008381 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008382 (NXT(2) == '-') && (NXT(3) == '-')) {
8383 xmlParseComment(ctxt);
8384 ctxt->instate = XML_PARSER_CONTENT;
8385 }
8386
8387 /*
8388 * Fourth case : a sub-element.
8389 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008390 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008391 xmlParseElement(ctxt);
8392 }
8393
8394 /*
8395 * Fifth case : a reference. If if has not been resolved,
8396 * parsing returns it's Name, create the node
8397 */
8398
Daniel Veillard21a0f912001-02-25 19:54:14 +00008399 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008400 xmlParseReference(ctxt);
8401 }
8402
8403 /*
8404 * Last case, text. Note that References are handled directly.
8405 */
8406 else {
8407 xmlParseCharData(ctxt, 0);
8408 }
8409
8410 GROW;
8411 /*
8412 * Pop-up of finished entities.
8413 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008414 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008415 xmlPopInput(ctxt);
8416 SHRINK;
8417
Daniel Veillardfdc91562002-07-01 21:52:03 +00008418 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008419 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8420 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008421 ctxt->instate = XML_PARSER_EOF;
8422 break;
8423 }
8424 }
8425}
8426
8427/**
8428 * xmlParseElement:
8429 * @ctxt: an XML parser context
8430 *
8431 * parse an XML element, this is highly recursive
8432 *
8433 * [39] element ::= EmptyElemTag | STag content ETag
8434 *
8435 * [ WFC: Element Type Match ]
8436 * The Name in an element's end-tag must match the element type in the
8437 * start-tag.
8438 *
Owen Taylor3473f882001-02-23 17:55:21 +00008439 */
8440
8441void
8442xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008443 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008444 const xmlChar *prefix;
8445 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008446 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008447 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008448 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008449 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008450
8451 /* Capture start position */
8452 if (ctxt->record_info) {
8453 node_info.begin_pos = ctxt->input->consumed +
8454 (CUR_PTR - ctxt->input->base);
8455 node_info.begin_line = ctxt->input->line;
8456 }
8457
8458 if (ctxt->spaceNr == 0)
8459 spacePush(ctxt, -1);
8460 else
8461 spacePush(ctxt, *ctxt->space);
8462
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008463 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008464#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008466#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008467 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008468#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008469 else
8470 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008471#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008472 if (name == NULL) {
8473 spacePop(ctxt);
8474 return;
8475 }
8476 namePush(ctxt, name);
8477 ret = ctxt->node;
8478
Daniel Veillard4432df22003-09-28 18:58:27 +00008479#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008480 /*
8481 * [ VC: Root Element Type ]
8482 * The Name in the document type declaration must match the element
8483 * type of the root element.
8484 */
8485 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8486 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8487 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008488#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008489
8490 /*
8491 * Check for an Empty Element.
8492 */
8493 if ((RAW == '/') && (NXT(1) == '>')) {
8494 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008495 if (ctxt->sax2) {
8496 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8497 (!ctxt->disableSAX))
8498 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008499#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500 } else {
8501 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8502 (!ctxt->disableSAX))
8503 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008504#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008505 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008506 namePop(ctxt);
8507 spacePop(ctxt);
8508 if (nsNr != ctxt->nsNr)
8509 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008510 if ( ret != NULL && ctxt->record_info ) {
8511 node_info.end_pos = ctxt->input->consumed +
8512 (CUR_PTR - ctxt->input->base);
8513 node_info.end_line = ctxt->input->line;
8514 node_info.node = ret;
8515 xmlParserAddNodeInfo(ctxt, &node_info);
8516 }
8517 return;
8518 }
8519 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008520 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008521 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008522 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8523 "Couldn't find end of Start Tag %s line %d\n",
8524 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008525
8526 /*
8527 * end of parsing of this node.
8528 */
8529 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008530 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008531 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008532 if (nsNr != ctxt->nsNr)
8533 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008534
8535 /*
8536 * Capture end position and add node
8537 */
8538 if ( ret != NULL && ctxt->record_info ) {
8539 node_info.end_pos = ctxt->input->consumed +
8540 (CUR_PTR - ctxt->input->base);
8541 node_info.end_line = ctxt->input->line;
8542 node_info.node = ret;
8543 xmlParserAddNodeInfo(ctxt, &node_info);
8544 }
8545 return;
8546 }
8547
8548 /*
8549 * Parse the content of the element:
8550 */
8551 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008552 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008553 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008554 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008555 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008556
8557 /*
8558 * end of parsing of this node.
8559 */
8560 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008561 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008562 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 if (nsNr != ctxt->nsNr)
8564 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008565 return;
8566 }
8567
8568 /*
8569 * parse the end of tag: '</' should be here.
8570 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008571 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008572 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008573 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008574 }
8575#ifdef LIBXML_SAX1_ENABLED
8576 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008578#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008579
8580 /*
8581 * Capture end position and add node
8582 */
8583 if ( ret != NULL && ctxt->record_info ) {
8584 node_info.end_pos = ctxt->input->consumed +
8585 (CUR_PTR - ctxt->input->base);
8586 node_info.end_line = ctxt->input->line;
8587 node_info.node = ret;
8588 xmlParserAddNodeInfo(ctxt, &node_info);
8589 }
8590}
8591
8592/**
8593 * xmlParseVersionNum:
8594 * @ctxt: an XML parser context
8595 *
8596 * parse the XML version value.
8597 *
8598 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8599 *
8600 * Returns the string giving the XML version number, or NULL
8601 */
8602xmlChar *
8603xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8604 xmlChar *buf = NULL;
8605 int len = 0;
8606 int size = 10;
8607 xmlChar cur;
8608
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008609 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008610 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008611 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008612 return(NULL);
8613 }
8614 cur = CUR;
8615 while (((cur >= 'a') && (cur <= 'z')) ||
8616 ((cur >= 'A') && (cur <= 'Z')) ||
8617 ((cur >= '0') && (cur <= '9')) ||
8618 (cur == '_') || (cur == '.') ||
8619 (cur == ':') || (cur == '-')) {
8620 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008621 xmlChar *tmp;
8622
Owen Taylor3473f882001-02-23 17:55:21 +00008623 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008624 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8625 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008626 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 return(NULL);
8628 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008629 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008630 }
8631 buf[len++] = cur;
8632 NEXT;
8633 cur=CUR;
8634 }
8635 buf[len] = 0;
8636 return(buf);
8637}
8638
8639/**
8640 * xmlParseVersionInfo:
8641 * @ctxt: an XML parser context
8642 *
8643 * parse the XML version.
8644 *
8645 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8646 *
8647 * [25] Eq ::= S? '=' S?
8648 *
8649 * Returns the version string, e.g. "1.0"
8650 */
8651
8652xmlChar *
8653xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8654 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008655
Daniel Veillarda07050d2003-10-19 14:46:32 +00008656 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008657 SKIP(7);
8658 SKIP_BLANKS;
8659 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008660 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008661 return(NULL);
8662 }
8663 NEXT;
8664 SKIP_BLANKS;
8665 if (RAW == '"') {
8666 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008667 version = xmlParseVersionNum(ctxt);
8668 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008669 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008670 } else
8671 NEXT;
8672 } else if (RAW == '\''){
8673 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008674 version = xmlParseVersionNum(ctxt);
8675 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008676 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008677 } else
8678 NEXT;
8679 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008680 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008681 }
8682 }
8683 return(version);
8684}
8685
8686/**
8687 * xmlParseEncName:
8688 * @ctxt: an XML parser context
8689 *
8690 * parse the XML encoding name
8691 *
8692 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8693 *
8694 * Returns the encoding name value or NULL
8695 */
8696xmlChar *
8697xmlParseEncName(xmlParserCtxtPtr ctxt) {
8698 xmlChar *buf = NULL;
8699 int len = 0;
8700 int size = 10;
8701 xmlChar cur;
8702
8703 cur = CUR;
8704 if (((cur >= 'a') && (cur <= 'z')) ||
8705 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008706 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008707 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008708 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008709 return(NULL);
8710 }
8711
8712 buf[len++] = cur;
8713 NEXT;
8714 cur = CUR;
8715 while (((cur >= 'a') && (cur <= 'z')) ||
8716 ((cur >= 'A') && (cur <= 'Z')) ||
8717 ((cur >= '0') && (cur <= '9')) ||
8718 (cur == '.') || (cur == '_') ||
8719 (cur == '-')) {
8720 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008721 xmlChar *tmp;
8722
Owen Taylor3473f882001-02-23 17:55:21 +00008723 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008724 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8725 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008726 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008727 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008728 return(NULL);
8729 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008730 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008731 }
8732 buf[len++] = cur;
8733 NEXT;
8734 cur = CUR;
8735 if (cur == 0) {
8736 SHRINK;
8737 GROW;
8738 cur = CUR;
8739 }
8740 }
8741 buf[len] = 0;
8742 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008743 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008744 }
8745 return(buf);
8746}
8747
8748/**
8749 * xmlParseEncodingDecl:
8750 * @ctxt: an XML parser context
8751 *
8752 * parse the XML encoding declaration
8753 *
8754 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8755 *
8756 * this setups the conversion filters.
8757 *
8758 * Returns the encoding value or NULL
8759 */
8760
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008761const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008762xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8763 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008764
8765 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008766 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008767 SKIP(8);
8768 SKIP_BLANKS;
8769 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008770 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008771 return(NULL);
8772 }
8773 NEXT;
8774 SKIP_BLANKS;
8775 if (RAW == '"') {
8776 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008777 encoding = xmlParseEncName(ctxt);
8778 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008779 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008780 } else
8781 NEXT;
8782 } else if (RAW == '\''){
8783 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008784 encoding = xmlParseEncName(ctxt);
8785 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008786 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008787 } else
8788 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008789 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008790 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008791 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008792 /*
8793 * UTF-16 encoding stwich has already taken place at this stage,
8794 * more over the little-endian/big-endian selection is already done
8795 */
8796 if ((encoding != NULL) &&
8797 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8798 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008799 if (ctxt->encoding != NULL)
8800 xmlFree((xmlChar *) ctxt->encoding);
8801 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008802 }
8803 /*
8804 * UTF-8 encoding is handled natively
8805 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008806 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008807 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8808 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008809 if (ctxt->encoding != NULL)
8810 xmlFree((xmlChar *) ctxt->encoding);
8811 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008812 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008813 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008814 xmlCharEncodingHandlerPtr handler;
8815
8816 if (ctxt->input->encoding != NULL)
8817 xmlFree((xmlChar *) ctxt->input->encoding);
8818 ctxt->input->encoding = encoding;
8819
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008820 handler = xmlFindCharEncodingHandler((const char *) encoding);
8821 if (handler != NULL) {
8822 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008825 "Unsupported encoding %s\n", encoding);
8826 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008827 }
8828 }
8829 }
8830 return(encoding);
8831}
8832
8833/**
8834 * xmlParseSDDecl:
8835 * @ctxt: an XML parser context
8836 *
8837 * parse the XML standalone declaration
8838 *
8839 * [32] SDDecl ::= S 'standalone' Eq
8840 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8841 *
8842 * [ VC: Standalone Document Declaration ]
8843 * TODO The standalone document declaration must have the value "no"
8844 * if any external markup declarations contain declarations of:
8845 * - attributes with default values, if elements to which these
8846 * attributes apply appear in the document without specifications
8847 * of values for these attributes, or
8848 * - entities (other than amp, lt, gt, apos, quot), if references
8849 * to those entities appear in the document, or
8850 * - attributes with values subject to normalization, where the
8851 * attribute appears in the document with a value which will change
8852 * as a result of normalization, or
8853 * - element types with element content, if white space occurs directly
8854 * within any instance of those types.
8855 *
8856 * Returns 1 if standalone, 0 otherwise
8857 */
8858
8859int
8860xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8861 int standalone = -1;
8862
8863 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008864 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008865 SKIP(10);
8866 SKIP_BLANKS;
8867 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008869 return(standalone);
8870 }
8871 NEXT;
8872 SKIP_BLANKS;
8873 if (RAW == '\''){
8874 NEXT;
8875 if ((RAW == 'n') && (NXT(1) == 'o')) {
8876 standalone = 0;
8877 SKIP(2);
8878 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8879 (NXT(2) == 's')) {
8880 standalone = 1;
8881 SKIP(3);
8882 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008883 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008884 }
8885 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008886 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008887 } else
8888 NEXT;
8889 } else if (RAW == '"'){
8890 NEXT;
8891 if ((RAW == 'n') && (NXT(1) == 'o')) {
8892 standalone = 0;
8893 SKIP(2);
8894 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8895 (NXT(2) == 's')) {
8896 standalone = 1;
8897 SKIP(3);
8898 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008899 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008900 }
8901 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008902 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008903 } else
8904 NEXT;
8905 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008906 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008907 }
8908 }
8909 return(standalone);
8910}
8911
8912/**
8913 * xmlParseXMLDecl:
8914 * @ctxt: an XML parser context
8915 *
8916 * parse an XML declaration header
8917 *
8918 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8919 */
8920
8921void
8922xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8923 xmlChar *version;
8924
8925 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008926 * This value for standalone indicates that the document has an
8927 * XML declaration but it does not have a standalone attribute.
8928 * It will be overwritten later if a standalone attribute is found.
8929 */
8930 ctxt->input->standalone = -2;
8931
8932 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008933 * We know that '<?xml' is here.
8934 */
8935 SKIP(5);
8936
William M. Brack76e95df2003-10-18 16:20:14 +00008937 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8939 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008940 }
8941 SKIP_BLANKS;
8942
8943 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008944 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008945 */
8946 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008947 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008948 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008949 } else {
8950 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8951 /*
8952 * TODO: Blueberry should be detected here
8953 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008954 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8955 "Unsupported version '%s'\n",
8956 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008957 }
8958 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008959 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008960 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008961 }
Owen Taylor3473f882001-02-23 17:55:21 +00008962
8963 /*
8964 * We may have the encoding declaration
8965 */
William M. Brack76e95df2003-10-18 16:20:14 +00008966 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008967 if ((RAW == '?') && (NXT(1) == '>')) {
8968 SKIP(2);
8969 return;
8970 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008971 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008972 }
8973 xmlParseEncodingDecl(ctxt);
8974 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8975 /*
8976 * The XML REC instructs us to stop parsing right here
8977 */
8978 return;
8979 }
8980
8981 /*
8982 * We may have the standalone status.
8983 */
William M. Brack76e95df2003-10-18 16:20:14 +00008984 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008985 if ((RAW == '?') && (NXT(1) == '>')) {
8986 SKIP(2);
8987 return;
8988 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008989 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008990 }
8991 SKIP_BLANKS;
8992 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8993
8994 SKIP_BLANKS;
8995 if ((RAW == '?') && (NXT(1) == '>')) {
8996 SKIP(2);
8997 } else if (RAW == '>') {
8998 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008999 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009000 NEXT;
9001 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009002 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009003 MOVETO_ENDTAG(CUR_PTR);
9004 NEXT;
9005 }
9006}
9007
9008/**
9009 * xmlParseMisc:
9010 * @ctxt: an XML parser context
9011 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009012 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009013 *
9014 * [27] Misc ::= Comment | PI | S
9015 */
9016
9017void
9018xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009019 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009020 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009021 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009022 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009023 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009024 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009025 NEXT;
9026 } else
9027 xmlParseComment(ctxt);
9028 }
9029}
9030
9031/**
9032 * xmlParseDocument:
9033 * @ctxt: an XML parser context
9034 *
9035 * parse an XML document (and build a tree if using the standard SAX
9036 * interface).
9037 *
9038 * [1] document ::= prolog element Misc*
9039 *
9040 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9041 *
9042 * Returns 0, -1 in case of error. the parser context is augmented
9043 * as a result of the parsing.
9044 */
9045
9046int
9047xmlParseDocument(xmlParserCtxtPtr ctxt) {
9048 xmlChar start[4];
9049 xmlCharEncoding enc;
9050
9051 xmlInitParser();
9052
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009053 if ((ctxt == NULL) || (ctxt->input == NULL))
9054 return(-1);
9055
Owen Taylor3473f882001-02-23 17:55:21 +00009056 GROW;
9057
9058 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 * SAX: detecting the level.
9060 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009061 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062
9063 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009064 * SAX: beginning of the document processing.
9065 */
9066 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9067 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9068
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009069 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9070 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009071 /*
9072 * Get the 4 first bytes and decode the charset
9073 * if enc != XML_CHAR_ENCODING_NONE
9074 * plug some encoding conversion routines.
9075 */
9076 start[0] = RAW;
9077 start[1] = NXT(1);
9078 start[2] = NXT(2);
9079 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009080 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009081 if (enc != XML_CHAR_ENCODING_NONE) {
9082 xmlSwitchEncoding(ctxt, enc);
9083 }
Owen Taylor3473f882001-02-23 17:55:21 +00009084 }
9085
9086
9087 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009088 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009089 }
9090
9091 /*
9092 * Check for the XMLDecl in the Prolog.
9093 */
9094 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009095 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009096
9097 /*
9098 * Note that we will switch encoding on the fly.
9099 */
9100 xmlParseXMLDecl(ctxt);
9101 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9102 /*
9103 * The XML REC instructs us to stop parsing right here
9104 */
9105 return(-1);
9106 }
9107 ctxt->standalone = ctxt->input->standalone;
9108 SKIP_BLANKS;
9109 } else {
9110 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9111 }
9112 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9113 ctxt->sax->startDocument(ctxt->userData);
9114
9115 /*
9116 * The Misc part of the Prolog
9117 */
9118 GROW;
9119 xmlParseMisc(ctxt);
9120
9121 /*
9122 * Then possibly doc type declaration(s) and more Misc
9123 * (doctypedecl Misc*)?
9124 */
9125 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009126 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009127
9128 ctxt->inSubset = 1;
9129 xmlParseDocTypeDecl(ctxt);
9130 if (RAW == '[') {
9131 ctxt->instate = XML_PARSER_DTD;
9132 xmlParseInternalSubset(ctxt);
9133 }
9134
9135 /*
9136 * Create and update the external subset.
9137 */
9138 ctxt->inSubset = 2;
9139 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9140 (!ctxt->disableSAX))
9141 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9142 ctxt->extSubSystem, ctxt->extSubURI);
9143 ctxt->inSubset = 0;
9144
9145
9146 ctxt->instate = XML_PARSER_PROLOG;
9147 xmlParseMisc(ctxt);
9148 }
9149
9150 /*
9151 * Time to start parsing the tree itself
9152 */
9153 GROW;
9154 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009155 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9156 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009157 } else {
9158 ctxt->instate = XML_PARSER_CONTENT;
9159 xmlParseElement(ctxt);
9160 ctxt->instate = XML_PARSER_EPILOG;
9161
9162
9163 /*
9164 * The Misc part at the end
9165 */
9166 xmlParseMisc(ctxt);
9167
Daniel Veillard561b7f82002-03-20 21:55:57 +00009168 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009169 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009170 }
9171 ctxt->instate = XML_PARSER_EOF;
9172 }
9173
9174 /*
9175 * SAX: end of the document processing.
9176 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009177 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009178 ctxt->sax->endDocument(ctxt->userData);
9179
Daniel Veillard5997aca2002-03-18 18:36:20 +00009180 /*
9181 * Remove locally kept entity definitions if the tree was not built
9182 */
9183 if ((ctxt->myDoc != NULL) &&
9184 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9185 xmlFreeDoc(ctxt->myDoc);
9186 ctxt->myDoc = NULL;
9187 }
9188
Daniel Veillardc7612992002-02-17 22:47:37 +00009189 if (! ctxt->wellFormed) {
9190 ctxt->valid = 0;
9191 return(-1);
9192 }
Owen Taylor3473f882001-02-23 17:55:21 +00009193 return(0);
9194}
9195
9196/**
9197 * xmlParseExtParsedEnt:
9198 * @ctxt: an XML parser context
9199 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009200 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009201 * An external general parsed entity is well-formed if it matches the
9202 * production labeled extParsedEnt.
9203 *
9204 * [78] extParsedEnt ::= TextDecl? content
9205 *
9206 * Returns 0, -1 in case of error. the parser context is augmented
9207 * as a result of the parsing.
9208 */
9209
9210int
9211xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9212 xmlChar start[4];
9213 xmlCharEncoding enc;
9214
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009215 if ((ctxt == NULL) || (ctxt->input == NULL))
9216 return(-1);
9217
Owen Taylor3473f882001-02-23 17:55:21 +00009218 xmlDefaultSAXHandlerInit();
9219
Daniel Veillard309f81d2003-09-23 09:02:53 +00009220 xmlDetectSAX2(ctxt);
9221
Owen Taylor3473f882001-02-23 17:55:21 +00009222 GROW;
9223
9224 /*
9225 * SAX: beginning of the document processing.
9226 */
9227 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9228 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9229
9230 /*
9231 * Get the 4 first bytes and decode the charset
9232 * if enc != XML_CHAR_ENCODING_NONE
9233 * plug some encoding conversion routines.
9234 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009235 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9236 start[0] = RAW;
9237 start[1] = NXT(1);
9238 start[2] = NXT(2);
9239 start[3] = NXT(3);
9240 enc = xmlDetectCharEncoding(start, 4);
9241 if (enc != XML_CHAR_ENCODING_NONE) {
9242 xmlSwitchEncoding(ctxt, enc);
9243 }
Owen Taylor3473f882001-02-23 17:55:21 +00009244 }
9245
9246
9247 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009248 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009249 }
9250
9251 /*
9252 * Check for the XMLDecl in the Prolog.
9253 */
9254 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009255 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009256
9257 /*
9258 * Note that we will switch encoding on the fly.
9259 */
9260 xmlParseXMLDecl(ctxt);
9261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9262 /*
9263 * The XML REC instructs us to stop parsing right here
9264 */
9265 return(-1);
9266 }
9267 SKIP_BLANKS;
9268 } else {
9269 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9270 }
9271 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9272 ctxt->sax->startDocument(ctxt->userData);
9273
9274 /*
9275 * Doing validity checking on chunk doesn't make sense
9276 */
9277 ctxt->instate = XML_PARSER_CONTENT;
9278 ctxt->validate = 0;
9279 ctxt->loadsubset = 0;
9280 ctxt->depth = 0;
9281
9282 xmlParseContent(ctxt);
9283
9284 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009285 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009286 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009287 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009288 }
9289
9290 /*
9291 * SAX: end of the document processing.
9292 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009293 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009294 ctxt->sax->endDocument(ctxt->userData);
9295
9296 if (! ctxt->wellFormed) return(-1);
9297 return(0);
9298}
9299
Daniel Veillard73b013f2003-09-30 12:36:01 +00009300#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009301/************************************************************************
9302 * *
9303 * Progressive parsing interfaces *
9304 * *
9305 ************************************************************************/
9306
9307/**
9308 * xmlParseLookupSequence:
9309 * @ctxt: an XML parser context
9310 * @first: the first char to lookup
9311 * @next: the next char to lookup or zero
9312 * @third: the next char to lookup or zero
9313 *
9314 * Try to find if a sequence (first, next, third) or just (first next) or
9315 * (first) is available in the input stream.
9316 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9317 * to avoid rescanning sequences of bytes, it DOES change the state of the
9318 * parser, do not use liberally.
9319 *
9320 * Returns the index to the current parsing point if the full sequence
9321 * is available, -1 otherwise.
9322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009323static int
Owen Taylor3473f882001-02-23 17:55:21 +00009324xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9325 xmlChar next, xmlChar third) {
9326 int base, len;
9327 xmlParserInputPtr in;
9328 const xmlChar *buf;
9329
9330 in = ctxt->input;
9331 if (in == NULL) return(-1);
9332 base = in->cur - in->base;
9333 if (base < 0) return(-1);
9334 if (ctxt->checkIndex > base)
9335 base = ctxt->checkIndex;
9336 if (in->buf == NULL) {
9337 buf = in->base;
9338 len = in->length;
9339 } else {
9340 buf = in->buf->buffer->content;
9341 len = in->buf->buffer->use;
9342 }
9343 /* take into account the sequence length */
9344 if (third) len -= 2;
9345 else if (next) len --;
9346 for (;base < len;base++) {
9347 if (buf[base] == first) {
9348 if (third != 0) {
9349 if ((buf[base + 1] != next) ||
9350 (buf[base + 2] != third)) continue;
9351 } else if (next != 0) {
9352 if (buf[base + 1] != next) continue;
9353 }
9354 ctxt->checkIndex = 0;
9355#ifdef DEBUG_PUSH
9356 if (next == 0)
9357 xmlGenericError(xmlGenericErrorContext,
9358 "PP: lookup '%c' found at %d\n",
9359 first, base);
9360 else if (third == 0)
9361 xmlGenericError(xmlGenericErrorContext,
9362 "PP: lookup '%c%c' found at %d\n",
9363 first, next, base);
9364 else
9365 xmlGenericError(xmlGenericErrorContext,
9366 "PP: lookup '%c%c%c' found at %d\n",
9367 first, next, third, base);
9368#endif
9369 return(base - (in->cur - in->base));
9370 }
9371 }
9372 ctxt->checkIndex = base;
9373#ifdef DEBUG_PUSH
9374 if (next == 0)
9375 xmlGenericError(xmlGenericErrorContext,
9376 "PP: lookup '%c' failed\n", first);
9377 else if (third == 0)
9378 xmlGenericError(xmlGenericErrorContext,
9379 "PP: lookup '%c%c' failed\n", first, next);
9380 else
9381 xmlGenericError(xmlGenericErrorContext,
9382 "PP: lookup '%c%c%c' failed\n", first, next, third);
9383#endif
9384 return(-1);
9385}
9386
9387/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009388 * xmlParseGetLasts:
9389 * @ctxt: an XML parser context
9390 * @lastlt: pointer to store the last '<' from the input
9391 * @lastgt: pointer to store the last '>' from the input
9392 *
9393 * Lookup the last < and > in the current chunk
9394 */
9395static void
9396xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9397 const xmlChar **lastgt) {
9398 const xmlChar *tmp;
9399
9400 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9401 xmlGenericError(xmlGenericErrorContext,
9402 "Internal error: xmlParseGetLasts\n");
9403 return;
9404 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009405 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 tmp = ctxt->input->end;
9407 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009408 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009409 if (tmp < ctxt->input->base) {
9410 *lastlt = NULL;
9411 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009412 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009413 *lastlt = tmp;
9414 tmp++;
9415 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9416 if (*tmp == '\'') {
9417 tmp++;
9418 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9419 if (tmp < ctxt->input->end) tmp++;
9420 } else if (*tmp == '"') {
9421 tmp++;
9422 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9423 if (tmp < ctxt->input->end) tmp++;
9424 } else
9425 tmp++;
9426 }
9427 if (tmp < ctxt->input->end)
9428 *lastgt = tmp;
9429 else {
9430 tmp = *lastlt;
9431 tmp--;
9432 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9433 if (tmp >= ctxt->input->base)
9434 *lastgt = tmp;
9435 else
9436 *lastgt = NULL;
9437 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009438 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009439 } else {
9440 *lastlt = NULL;
9441 *lastgt = NULL;
9442 }
9443}
9444/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009445 * xmlCheckCdataPush:
9446 * @cur: pointer to the bock of characters
9447 * @len: length of the block in bytes
9448 *
9449 * Check that the block of characters is okay as SCdata content [20]
9450 *
9451 * Returns the number of bytes to pass if okay, a negative index where an
9452 * UTF-8 error occured otherwise
9453 */
9454static int
9455xmlCheckCdataPush(const xmlChar *utf, int len) {
9456 int ix;
9457 unsigned char c;
9458 int codepoint;
9459
9460 if ((utf == NULL) || (len <= 0))
9461 return(0);
9462
9463 for (ix = 0; ix < len;) { /* string is 0-terminated */
9464 c = utf[ix];
9465 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9466 if (c >= 0x20)
9467 ix++;
9468 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9469 ix++;
9470 else
9471 return(-ix);
9472 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9473 if (ix + 2 > len) return(ix);
9474 if ((utf[ix+1] & 0xc0 ) != 0x80)
9475 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009476 codepoint = (utf[ix] & 0x1f) << 6;
9477 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009478 if (!xmlIsCharQ(codepoint))
9479 return(-ix);
9480 ix += 2;
9481 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9482 if (ix + 3 > len) return(ix);
9483 if (((utf[ix+1] & 0xc0) != 0x80) ||
9484 ((utf[ix+2] & 0xc0) != 0x80))
9485 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009486 codepoint = (utf[ix] & 0xf) << 12;
9487 codepoint |= (utf[ix+1] & 0x3f) << 6;
9488 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009489 if (!xmlIsCharQ(codepoint))
9490 return(-ix);
9491 ix += 3;
9492 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9493 if (ix + 4 > len) return(ix);
9494 if (((utf[ix+1] & 0xc0) != 0x80) ||
9495 ((utf[ix+2] & 0xc0) != 0x80) ||
9496 ((utf[ix+3] & 0xc0) != 0x80))
9497 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009498 codepoint = (utf[ix] & 0x7) << 18;
9499 codepoint |= (utf[ix+1] & 0x3f) << 12;
9500 codepoint |= (utf[ix+2] & 0x3f) << 6;
9501 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009502 if (!xmlIsCharQ(codepoint))
9503 return(-ix);
9504 ix += 4;
9505 } else /* unknown encoding */
9506 return(-ix);
9507 }
9508 return(ix);
9509}
9510
9511/**
Owen Taylor3473f882001-02-23 17:55:21 +00009512 * xmlParseTryOrFinish:
9513 * @ctxt: an XML parser context
9514 * @terminate: last chunk indicator
9515 *
9516 * Try to progress on parsing
9517 *
9518 * Returns zero if no parsing was possible
9519 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009520static int
Owen Taylor3473f882001-02-23 17:55:21 +00009521xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9522 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009523 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009524 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009525 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009526
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009527 if (ctxt->input == NULL)
9528 return(0);
9529
Owen Taylor3473f882001-02-23 17:55:21 +00009530#ifdef DEBUG_PUSH
9531 switch (ctxt->instate) {
9532 case XML_PARSER_EOF:
9533 xmlGenericError(xmlGenericErrorContext,
9534 "PP: try EOF\n"); break;
9535 case XML_PARSER_START:
9536 xmlGenericError(xmlGenericErrorContext,
9537 "PP: try START\n"); break;
9538 case XML_PARSER_MISC:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: try MISC\n");break;
9541 case XML_PARSER_COMMENT:
9542 xmlGenericError(xmlGenericErrorContext,
9543 "PP: try COMMENT\n");break;
9544 case XML_PARSER_PROLOG:
9545 xmlGenericError(xmlGenericErrorContext,
9546 "PP: try PROLOG\n");break;
9547 case XML_PARSER_START_TAG:
9548 xmlGenericError(xmlGenericErrorContext,
9549 "PP: try START_TAG\n");break;
9550 case XML_PARSER_CONTENT:
9551 xmlGenericError(xmlGenericErrorContext,
9552 "PP: try CONTENT\n");break;
9553 case XML_PARSER_CDATA_SECTION:
9554 xmlGenericError(xmlGenericErrorContext,
9555 "PP: try CDATA_SECTION\n");break;
9556 case XML_PARSER_END_TAG:
9557 xmlGenericError(xmlGenericErrorContext,
9558 "PP: try END_TAG\n");break;
9559 case XML_PARSER_ENTITY_DECL:
9560 xmlGenericError(xmlGenericErrorContext,
9561 "PP: try ENTITY_DECL\n");break;
9562 case XML_PARSER_ENTITY_VALUE:
9563 xmlGenericError(xmlGenericErrorContext,
9564 "PP: try ENTITY_VALUE\n");break;
9565 case XML_PARSER_ATTRIBUTE_VALUE:
9566 xmlGenericError(xmlGenericErrorContext,
9567 "PP: try ATTRIBUTE_VALUE\n");break;
9568 case XML_PARSER_DTD:
9569 xmlGenericError(xmlGenericErrorContext,
9570 "PP: try DTD\n");break;
9571 case XML_PARSER_EPILOG:
9572 xmlGenericError(xmlGenericErrorContext,
9573 "PP: try EPILOG\n");break;
9574 case XML_PARSER_PI:
9575 xmlGenericError(xmlGenericErrorContext,
9576 "PP: try PI\n");break;
9577 case XML_PARSER_IGNORE:
9578 xmlGenericError(xmlGenericErrorContext,
9579 "PP: try IGNORE\n");break;
9580 }
9581#endif
9582
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009583 if ((ctxt->input != NULL) &&
9584 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009585 xmlSHRINK(ctxt);
9586 ctxt->checkIndex = 0;
9587 }
9588 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009589
Daniel Veillarda880b122003-04-21 21:36:41 +00009590 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009591 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009592 return(0);
9593
9594
Owen Taylor3473f882001-02-23 17:55:21 +00009595 /*
9596 * Pop-up of finished entities.
9597 */
9598 while ((RAW == 0) && (ctxt->inputNr > 1))
9599 xmlPopInput(ctxt);
9600
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009601 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009602 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 avail = ctxt->input->length -
9604 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009605 else {
9606 /*
9607 * If we are operating on converted input, try to flush
9608 * remainng chars to avoid them stalling in the non-converted
9609 * buffer.
9610 */
9611 if ((ctxt->input->buf->raw != NULL) &&
9612 (ctxt->input->buf->raw->use > 0)) {
9613 int base = ctxt->input->base -
9614 ctxt->input->buf->buffer->content;
9615 int current = ctxt->input->cur - ctxt->input->base;
9616
9617 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9618 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9619 ctxt->input->cur = ctxt->input->base + current;
9620 ctxt->input->end =
9621 &ctxt->input->buf->buffer->content[
9622 ctxt->input->buf->buffer->use];
9623 }
9624 avail = ctxt->input->buf->buffer->use -
9625 (ctxt->input->cur - ctxt->input->base);
9626 }
Owen Taylor3473f882001-02-23 17:55:21 +00009627 if (avail < 1)
9628 goto done;
9629 switch (ctxt->instate) {
9630 case XML_PARSER_EOF:
9631 /*
9632 * Document parsing is done !
9633 */
9634 goto done;
9635 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009636 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9637 xmlChar start[4];
9638 xmlCharEncoding enc;
9639
9640 /*
9641 * Very first chars read from the document flow.
9642 */
9643 if (avail < 4)
9644 goto done;
9645
9646 /*
9647 * Get the 4 first bytes and decode the charset
9648 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009649 * plug some encoding conversion routines,
9650 * else xmlSwitchEncoding will set to (default)
9651 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009652 */
9653 start[0] = RAW;
9654 start[1] = NXT(1);
9655 start[2] = NXT(2);
9656 start[3] = NXT(3);
9657 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009658 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009659 break;
9660 }
Owen Taylor3473f882001-02-23 17:55:21 +00009661
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009662 if (avail < 2)
9663 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009664 cur = ctxt->input->cur[0];
9665 next = ctxt->input->cur[1];
9666 if (cur == 0) {
9667 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9668 ctxt->sax->setDocumentLocator(ctxt->userData,
9669 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009670 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009671 ctxt->instate = XML_PARSER_EOF;
9672#ifdef DEBUG_PUSH
9673 xmlGenericError(xmlGenericErrorContext,
9674 "PP: entering EOF\n");
9675#endif
9676 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9677 ctxt->sax->endDocument(ctxt->userData);
9678 goto done;
9679 }
9680 if ((cur == '<') && (next == '?')) {
9681 /* PI or XML decl */
9682 if (avail < 5) return(ret);
9683 if ((!terminate) &&
9684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9685 return(ret);
9686 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9687 ctxt->sax->setDocumentLocator(ctxt->userData,
9688 &xmlDefaultSAXLocator);
9689 if ((ctxt->input->cur[2] == 'x') &&
9690 (ctxt->input->cur[3] == 'm') &&
9691 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009692 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009693 ret += 5;
9694#ifdef DEBUG_PUSH
9695 xmlGenericError(xmlGenericErrorContext,
9696 "PP: Parsing XML Decl\n");
9697#endif
9698 xmlParseXMLDecl(ctxt);
9699 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9700 /*
9701 * The XML REC instructs us to stop parsing right
9702 * here
9703 */
9704 ctxt->instate = XML_PARSER_EOF;
9705 return(0);
9706 }
9707 ctxt->standalone = ctxt->input->standalone;
9708 if ((ctxt->encoding == NULL) &&
9709 (ctxt->input->encoding != NULL))
9710 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9711 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9712 (!ctxt->disableSAX))
9713 ctxt->sax->startDocument(ctxt->userData);
9714 ctxt->instate = XML_PARSER_MISC;
9715#ifdef DEBUG_PUSH
9716 xmlGenericError(xmlGenericErrorContext,
9717 "PP: entering MISC\n");
9718#endif
9719 } else {
9720 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9721 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9722 (!ctxt->disableSAX))
9723 ctxt->sax->startDocument(ctxt->userData);
9724 ctxt->instate = XML_PARSER_MISC;
9725#ifdef DEBUG_PUSH
9726 xmlGenericError(xmlGenericErrorContext,
9727 "PP: entering MISC\n");
9728#endif
9729 }
9730 } else {
9731 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9732 ctxt->sax->setDocumentLocator(ctxt->userData,
9733 &xmlDefaultSAXLocator);
9734 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009735 if (ctxt->version == NULL) {
9736 xmlErrMemory(ctxt, NULL);
9737 break;
9738 }
Owen Taylor3473f882001-02-23 17:55:21 +00009739 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9740 (!ctxt->disableSAX))
9741 ctxt->sax->startDocument(ctxt->userData);
9742 ctxt->instate = XML_PARSER_MISC;
9743#ifdef DEBUG_PUSH
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: entering MISC\n");
9746#endif
9747 }
9748 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009749 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009750 const xmlChar *name;
9751 const xmlChar *prefix;
9752 const xmlChar *URI;
9753 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009754
9755 if ((avail < 2) && (ctxt->inputNr == 1))
9756 goto done;
9757 cur = ctxt->input->cur[0];
9758 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009759 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009760 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009761 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9762 ctxt->sax->endDocument(ctxt->userData);
9763 goto done;
9764 }
9765 if (!terminate) {
9766 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009767 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009768 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009769 goto done;
9770 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9771 goto done;
9772 }
9773 }
9774 if (ctxt->spaceNr == 0)
9775 spacePush(ctxt, -1);
9776 else
9777 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009778#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009779 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009780#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009781 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009782#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009783 else
9784 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009785#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009786 if (name == NULL) {
9787 spacePop(ctxt);
9788 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009789 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9790 ctxt->sax->endDocument(ctxt->userData);
9791 goto done;
9792 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009793#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009794 /*
9795 * [ VC: Root Element Type ]
9796 * The Name in the document type declaration must match
9797 * the element type of the root element.
9798 */
9799 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9800 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9801 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009802#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009803
9804 /*
9805 * Check for an Empty Element.
9806 */
9807 if ((RAW == '/') && (NXT(1) == '>')) {
9808 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009809
9810 if (ctxt->sax2) {
9811 if ((ctxt->sax != NULL) &&
9812 (ctxt->sax->endElementNs != NULL) &&
9813 (!ctxt->disableSAX))
9814 ctxt->sax->endElementNs(ctxt->userData, name,
9815 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009816 if (ctxt->nsNr - nsNr > 0)
9817 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009818#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009819 } else {
9820 if ((ctxt->sax != NULL) &&
9821 (ctxt->sax->endElement != NULL) &&
9822 (!ctxt->disableSAX))
9823 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009824#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009825 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009826 spacePop(ctxt);
9827 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009828 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009829 } else {
9830 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009831 }
9832 break;
9833 }
9834 if (RAW == '>') {
9835 NEXT;
9836 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009837 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 "Couldn't find end of Start Tag %s\n",
9839 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009840 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009841 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009842 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009843 if (ctxt->sax2)
9844 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009845#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009846 else
9847 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009848#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009849
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 break;
9852 }
9853 case XML_PARSER_CONTENT: {
9854 const xmlChar *test;
9855 unsigned int cons;
9856 if ((avail < 2) && (ctxt->inputNr == 1))
9857 goto done;
9858 cur = ctxt->input->cur[0];
9859 next = ctxt->input->cur[1];
9860
9861 test = CUR_PTR;
9862 cons = ctxt->input->consumed;
9863 if ((cur == '<') && (next == '/')) {
9864 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009865 break;
9866 } else if ((cur == '<') && (next == '?')) {
9867 if ((!terminate) &&
9868 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9869 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009870 xmlParsePI(ctxt);
9871 } else if ((cur == '<') && (next != '!')) {
9872 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009873 break;
9874 } else if ((cur == '<') && (next == '!') &&
9875 (ctxt->input->cur[2] == '-') &&
9876 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009877 int term;
9878
9879 if (avail < 4)
9880 goto done;
9881 ctxt->input->cur += 4;
9882 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9883 ctxt->input->cur -= 4;
9884 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009885 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009886 xmlParseComment(ctxt);
9887 ctxt->instate = XML_PARSER_CONTENT;
9888 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9889 (ctxt->input->cur[2] == '[') &&
9890 (ctxt->input->cur[3] == 'C') &&
9891 (ctxt->input->cur[4] == 'D') &&
9892 (ctxt->input->cur[5] == 'A') &&
9893 (ctxt->input->cur[6] == 'T') &&
9894 (ctxt->input->cur[7] == 'A') &&
9895 (ctxt->input->cur[8] == '[')) {
9896 SKIP(9);
9897 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009898 break;
9899 } else if ((cur == '<') && (next == '!') &&
9900 (avail < 9)) {
9901 goto done;
9902 } else if (cur == '&') {
9903 if ((!terminate) &&
9904 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9905 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009906 xmlParseReference(ctxt);
9907 } else {
9908 /* TODO Avoid the extra copy, handle directly !!! */
9909 /*
9910 * Goal of the following test is:
9911 * - minimize calls to the SAX 'character' callback
9912 * when they are mergeable
9913 * - handle an problem for isBlank when we only parse
9914 * a sequence of blank chars and the next one is
9915 * not available to check against '<' presence.
9916 * - tries to homogenize the differences in SAX
9917 * callbacks between the push and pull versions
9918 * of the parser.
9919 */
9920 if ((ctxt->inputNr == 1) &&
9921 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9922 if (!terminate) {
9923 if (ctxt->progressive) {
9924 if ((lastlt == NULL) ||
9925 (ctxt->input->cur > lastlt))
9926 goto done;
9927 } else if (xmlParseLookupSequence(ctxt,
9928 '<', 0, 0) < 0) {
9929 goto done;
9930 }
9931 }
9932 }
9933 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009934 xmlParseCharData(ctxt, 0);
9935 }
9936 /*
9937 * Pop-up of finished entities.
9938 */
9939 while ((RAW == 0) && (ctxt->inputNr > 1))
9940 xmlPopInput(ctxt);
9941 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009942 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9943 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009944 ctxt->instate = XML_PARSER_EOF;
9945 break;
9946 }
9947 break;
9948 }
9949 case XML_PARSER_END_TAG:
9950 if (avail < 2)
9951 goto done;
9952 if (!terminate) {
9953 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009954 /* > can be found unescaped in attribute values */
9955 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009956 goto done;
9957 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9958 goto done;
9959 }
9960 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009961 if (ctxt->sax2) {
9962 xmlParseEndTag2(ctxt,
9963 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9964 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009965 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009966 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009967 }
9968#ifdef LIBXML_SAX1_ENABLED
9969 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009970 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009971#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009972 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009973 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009974 } else {
9975 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009976 }
9977 break;
9978 case XML_PARSER_CDATA_SECTION: {
9979 /*
9980 * The Push mode need to have the SAX callback for
9981 * cdataBlock merge back contiguous callbacks.
9982 */
9983 int base;
9984
9985 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9986 if (base < 0) {
9987 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009988 int tmp;
9989
9990 tmp = xmlCheckCdataPush(ctxt->input->cur,
9991 XML_PARSER_BIG_BUFFER_SIZE);
9992 if (tmp < 0) {
9993 tmp = -tmp;
9994 ctxt->input->cur += tmp;
9995 goto encoding_error;
9996 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009997 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9998 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009999 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010000 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010001 else if (ctxt->sax->characters != NULL)
10002 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010003 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010004 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010005 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010006 ctxt->checkIndex = 0;
10007 }
10008 goto done;
10009 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010010 int tmp;
10011
10012 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10013 if ((tmp < 0) || (tmp != base)) {
10014 tmp = -tmp;
10015 ctxt->input->cur += tmp;
10016 goto encoding_error;
10017 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 if ((ctxt->sax != NULL) && (base > 0) &&
10019 (!ctxt->disableSAX)) {
10020 if (ctxt->sax->cdataBlock != NULL)
10021 ctxt->sax->cdataBlock(ctxt->userData,
10022 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010023 else if (ctxt->sax->characters != NULL)
10024 ctxt->sax->characters(ctxt->userData,
10025 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010026 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010027 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010028 ctxt->checkIndex = 0;
10029 ctxt->instate = XML_PARSER_CONTENT;
10030#ifdef DEBUG_PUSH
10031 xmlGenericError(xmlGenericErrorContext,
10032 "PP: entering CONTENT\n");
10033#endif
10034 }
10035 break;
10036 }
Owen Taylor3473f882001-02-23 17:55:21 +000010037 case XML_PARSER_MISC:
10038 SKIP_BLANKS;
10039 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010040 avail = ctxt->input->length -
10041 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010042 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010043 avail = ctxt->input->buf->buffer->use -
10044 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010045 if (avail < 2)
10046 goto done;
10047 cur = ctxt->input->cur[0];
10048 next = ctxt->input->cur[1];
10049 if ((cur == '<') && (next == '?')) {
10050 if ((!terminate) &&
10051 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10052 goto done;
10053#ifdef DEBUG_PUSH
10054 xmlGenericError(xmlGenericErrorContext,
10055 "PP: Parsing PI\n");
10056#endif
10057 xmlParsePI(ctxt);
10058 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010059 (ctxt->input->cur[2] == '-') &&
10060 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010061 if ((!terminate) &&
10062 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10063 goto done;
10064#ifdef DEBUG_PUSH
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: Parsing Comment\n");
10067#endif
10068 xmlParseComment(ctxt);
10069 ctxt->instate = XML_PARSER_MISC;
10070 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010071 (ctxt->input->cur[2] == 'D') &&
10072 (ctxt->input->cur[3] == 'O') &&
10073 (ctxt->input->cur[4] == 'C') &&
10074 (ctxt->input->cur[5] == 'T') &&
10075 (ctxt->input->cur[6] == 'Y') &&
10076 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010077 (ctxt->input->cur[8] == 'E')) {
10078 if ((!terminate) &&
10079 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10080 goto done;
10081#ifdef DEBUG_PUSH
10082 xmlGenericError(xmlGenericErrorContext,
10083 "PP: Parsing internal subset\n");
10084#endif
10085 ctxt->inSubset = 1;
10086 xmlParseDocTypeDecl(ctxt);
10087 if (RAW == '[') {
10088 ctxt->instate = XML_PARSER_DTD;
10089#ifdef DEBUG_PUSH
10090 xmlGenericError(xmlGenericErrorContext,
10091 "PP: entering DTD\n");
10092#endif
10093 } else {
10094 /*
10095 * Create and update the external subset.
10096 */
10097 ctxt->inSubset = 2;
10098 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10099 (ctxt->sax->externalSubset != NULL))
10100 ctxt->sax->externalSubset(ctxt->userData,
10101 ctxt->intSubName, ctxt->extSubSystem,
10102 ctxt->extSubURI);
10103 ctxt->inSubset = 0;
10104 ctxt->instate = XML_PARSER_PROLOG;
10105#ifdef DEBUG_PUSH
10106 xmlGenericError(xmlGenericErrorContext,
10107 "PP: entering PROLOG\n");
10108#endif
10109 }
10110 } else if ((cur == '<') && (next == '!') &&
10111 (avail < 9)) {
10112 goto done;
10113 } else {
10114 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010115 ctxt->progressive = 1;
10116 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010117#ifdef DEBUG_PUSH
10118 xmlGenericError(xmlGenericErrorContext,
10119 "PP: entering START_TAG\n");
10120#endif
10121 }
10122 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010123 case XML_PARSER_PROLOG:
10124 SKIP_BLANKS;
10125 if (ctxt->input->buf == NULL)
10126 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10127 else
10128 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10129 if (avail < 2)
10130 goto done;
10131 cur = ctxt->input->cur[0];
10132 next = ctxt->input->cur[1];
10133 if ((cur == '<') && (next == '?')) {
10134 if ((!terminate) &&
10135 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10136 goto done;
10137#ifdef DEBUG_PUSH
10138 xmlGenericError(xmlGenericErrorContext,
10139 "PP: Parsing PI\n");
10140#endif
10141 xmlParsePI(ctxt);
10142 } else if ((cur == '<') && (next == '!') &&
10143 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10144 if ((!terminate) &&
10145 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10146 goto done;
10147#ifdef DEBUG_PUSH
10148 xmlGenericError(xmlGenericErrorContext,
10149 "PP: Parsing Comment\n");
10150#endif
10151 xmlParseComment(ctxt);
10152 ctxt->instate = XML_PARSER_PROLOG;
10153 } else if ((cur == '<') && (next == '!') &&
10154 (avail < 4)) {
10155 goto done;
10156 } else {
10157 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010158 if (ctxt->progressive == 0)
10159 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010160 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010161#ifdef DEBUG_PUSH
10162 xmlGenericError(xmlGenericErrorContext,
10163 "PP: entering START_TAG\n");
10164#endif
10165 }
10166 break;
10167 case XML_PARSER_EPILOG:
10168 SKIP_BLANKS;
10169 if (ctxt->input->buf == NULL)
10170 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10171 else
10172 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10173 if (avail < 2)
10174 goto done;
10175 cur = ctxt->input->cur[0];
10176 next = ctxt->input->cur[1];
10177 if ((cur == '<') && (next == '?')) {
10178 if ((!terminate) &&
10179 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10180 goto done;
10181#ifdef DEBUG_PUSH
10182 xmlGenericError(xmlGenericErrorContext,
10183 "PP: Parsing PI\n");
10184#endif
10185 xmlParsePI(ctxt);
10186 ctxt->instate = XML_PARSER_EPILOG;
10187 } else if ((cur == '<') && (next == '!') &&
10188 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10189 if ((!terminate) &&
10190 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10191 goto done;
10192#ifdef DEBUG_PUSH
10193 xmlGenericError(xmlGenericErrorContext,
10194 "PP: Parsing Comment\n");
10195#endif
10196 xmlParseComment(ctxt);
10197 ctxt->instate = XML_PARSER_EPILOG;
10198 } else if ((cur == '<') && (next == '!') &&
10199 (avail < 4)) {
10200 goto done;
10201 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010202 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010203 ctxt->instate = XML_PARSER_EOF;
10204#ifdef DEBUG_PUSH
10205 xmlGenericError(xmlGenericErrorContext,
10206 "PP: entering EOF\n");
10207#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010208 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010209 ctxt->sax->endDocument(ctxt->userData);
10210 goto done;
10211 }
10212 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010213 case XML_PARSER_DTD: {
10214 /*
10215 * Sorry but progressive parsing of the internal subset
10216 * is not expected to be supported. We first check that
10217 * the full content of the internal subset is available and
10218 * the parsing is launched only at that point.
10219 * Internal subset ends up with "']' S? '>'" in an unescaped
10220 * section and not in a ']]>' sequence which are conditional
10221 * sections (whoever argued to keep that crap in XML deserve
10222 * a place in hell !).
10223 */
10224 int base, i;
10225 xmlChar *buf;
10226 xmlChar quote = 0;
10227
10228 base = ctxt->input->cur - ctxt->input->base;
10229 if (base < 0) return(0);
10230 if (ctxt->checkIndex > base)
10231 base = ctxt->checkIndex;
10232 buf = ctxt->input->buf->buffer->content;
10233 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10234 base++) {
10235 if (quote != 0) {
10236 if (buf[base] == quote)
10237 quote = 0;
10238 continue;
10239 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010240 if ((quote == 0) && (buf[base] == '<')) {
10241 int found = 0;
10242 /* special handling of comments */
10243 if (((unsigned int) base + 4 <
10244 ctxt->input->buf->buffer->use) &&
10245 (buf[base + 1] == '!') &&
10246 (buf[base + 2] == '-') &&
10247 (buf[base + 3] == '-')) {
10248 for (;(unsigned int) base + 3 <
10249 ctxt->input->buf->buffer->use; base++) {
10250 if ((buf[base] == '-') &&
10251 (buf[base + 1] == '-') &&
10252 (buf[base + 2] == '>')) {
10253 found = 1;
10254 base += 2;
10255 break;
10256 }
10257 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010258 if (!found) {
10259#if 0
10260 fprintf(stderr, "unfinished comment\n");
10261#endif
10262 break; /* for */
10263 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010264 continue;
10265 }
10266 }
Owen Taylor3473f882001-02-23 17:55:21 +000010267 if (buf[base] == '"') {
10268 quote = '"';
10269 continue;
10270 }
10271 if (buf[base] == '\'') {
10272 quote = '\'';
10273 continue;
10274 }
10275 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010276#if 0
10277 fprintf(stderr, "%c%c%c%c: ", buf[base],
10278 buf[base + 1], buf[base + 2], buf[base + 3]);
10279#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010280 if ((unsigned int) base +1 >=
10281 ctxt->input->buf->buffer->use)
10282 break;
10283 if (buf[base + 1] == ']') {
10284 /* conditional crap, skip both ']' ! */
10285 base++;
10286 continue;
10287 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010288 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010289 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10290 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010291 if (buf[base + i] == '>') {
10292#if 0
10293 fprintf(stderr, "found\n");
10294#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010295 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010296 }
10297 if (!IS_BLANK_CH(buf[base + i])) {
10298#if 0
10299 fprintf(stderr, "not found\n");
10300#endif
10301 goto not_end_of_int_subset;
10302 }
Owen Taylor3473f882001-02-23 17:55:21 +000010303 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010304#if 0
10305 fprintf(stderr, "end of stream\n");
10306#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010307 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010308
Owen Taylor3473f882001-02-23 17:55:21 +000010309 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010310not_end_of_int_subset:
10311 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313 /*
10314 * We didn't found the end of the Internal subset
10315 */
Owen Taylor3473f882001-02-23 17:55:21 +000010316#ifdef DEBUG_PUSH
10317 if (next == 0)
10318 xmlGenericError(xmlGenericErrorContext,
10319 "PP: lookup of int subset end filed\n");
10320#endif
10321 goto done;
10322
10323found_end_int_subset:
10324 xmlParseInternalSubset(ctxt);
10325 ctxt->inSubset = 2;
10326 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10327 (ctxt->sax->externalSubset != NULL))
10328 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10329 ctxt->extSubSystem, ctxt->extSubURI);
10330 ctxt->inSubset = 0;
10331 ctxt->instate = XML_PARSER_PROLOG;
10332 ctxt->checkIndex = 0;
10333#ifdef DEBUG_PUSH
10334 xmlGenericError(xmlGenericErrorContext,
10335 "PP: entering PROLOG\n");
10336#endif
10337 break;
10338 }
10339 case XML_PARSER_COMMENT:
10340 xmlGenericError(xmlGenericErrorContext,
10341 "PP: internal error, state == COMMENT\n");
10342 ctxt->instate = XML_PARSER_CONTENT;
10343#ifdef DEBUG_PUSH
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: entering CONTENT\n");
10346#endif
10347 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010348 case XML_PARSER_IGNORE:
10349 xmlGenericError(xmlGenericErrorContext,
10350 "PP: internal error, state == IGNORE");
10351 ctxt->instate = XML_PARSER_DTD;
10352#ifdef DEBUG_PUSH
10353 xmlGenericError(xmlGenericErrorContext,
10354 "PP: entering DTD\n");
10355#endif
10356 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010357 case XML_PARSER_PI:
10358 xmlGenericError(xmlGenericErrorContext,
10359 "PP: internal error, state == PI\n");
10360 ctxt->instate = XML_PARSER_CONTENT;
10361#ifdef DEBUG_PUSH
10362 xmlGenericError(xmlGenericErrorContext,
10363 "PP: entering CONTENT\n");
10364#endif
10365 break;
10366 case XML_PARSER_ENTITY_DECL:
10367 xmlGenericError(xmlGenericErrorContext,
10368 "PP: internal error, state == ENTITY_DECL\n");
10369 ctxt->instate = XML_PARSER_DTD;
10370#ifdef DEBUG_PUSH
10371 xmlGenericError(xmlGenericErrorContext,
10372 "PP: entering DTD\n");
10373#endif
10374 break;
10375 case XML_PARSER_ENTITY_VALUE:
10376 xmlGenericError(xmlGenericErrorContext,
10377 "PP: internal error, state == ENTITY_VALUE\n");
10378 ctxt->instate = XML_PARSER_CONTENT;
10379#ifdef DEBUG_PUSH
10380 xmlGenericError(xmlGenericErrorContext,
10381 "PP: entering DTD\n");
10382#endif
10383 break;
10384 case XML_PARSER_ATTRIBUTE_VALUE:
10385 xmlGenericError(xmlGenericErrorContext,
10386 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10387 ctxt->instate = XML_PARSER_START_TAG;
10388#ifdef DEBUG_PUSH
10389 xmlGenericError(xmlGenericErrorContext,
10390 "PP: entering START_TAG\n");
10391#endif
10392 break;
10393 case XML_PARSER_SYSTEM_LITERAL:
10394 xmlGenericError(xmlGenericErrorContext,
10395 "PP: internal error, state == SYSTEM_LITERAL\n");
10396 ctxt->instate = XML_PARSER_START_TAG;
10397#ifdef DEBUG_PUSH
10398 xmlGenericError(xmlGenericErrorContext,
10399 "PP: entering START_TAG\n");
10400#endif
10401 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010402 case XML_PARSER_PUBLIC_LITERAL:
10403 xmlGenericError(xmlGenericErrorContext,
10404 "PP: internal error, state == PUBLIC_LITERAL\n");
10405 ctxt->instate = XML_PARSER_START_TAG;
10406#ifdef DEBUG_PUSH
10407 xmlGenericError(xmlGenericErrorContext,
10408 "PP: entering START_TAG\n");
10409#endif
10410 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010411 }
10412 }
10413done:
10414#ifdef DEBUG_PUSH
10415 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10416#endif
10417 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010418encoding_error:
10419 {
10420 char buffer[150];
10421
10422 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10423 ctxt->input->cur[0], ctxt->input->cur[1],
10424 ctxt->input->cur[2], ctxt->input->cur[3]);
10425 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10426 "Input is not proper UTF-8, indicate encoding !\n%s",
10427 BAD_CAST buffer, NULL);
10428 }
10429 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010430}
10431
10432/**
Owen Taylor3473f882001-02-23 17:55:21 +000010433 * xmlParseChunk:
10434 * @ctxt: an XML parser context
10435 * @chunk: an char array
10436 * @size: the size in byte of the chunk
10437 * @terminate: last chunk indicator
10438 *
10439 * Parse a Chunk of memory
10440 *
10441 * Returns zero if no error, the xmlParserErrors otherwise.
10442 */
10443int
10444xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10445 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010446 int end_in_lf = 0;
10447
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010448 if (ctxt == NULL)
10449 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010450 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010451 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010452 if (ctxt->instate == XML_PARSER_START)
10453 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010454 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10455 (chunk[size - 1] == '\r')) {
10456 end_in_lf = 1;
10457 size--;
10458 }
Owen Taylor3473f882001-02-23 17:55:21 +000010459 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10460 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10461 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10462 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010463 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010464
William M. Bracka3215c72004-07-31 16:24:01 +000010465 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10466 if (res < 0) {
10467 ctxt->errNo = XML_PARSER_EOF;
10468 ctxt->disableSAX = 1;
10469 return (XML_PARSER_EOF);
10470 }
Owen Taylor3473f882001-02-23 17:55:21 +000010471 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10472 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010473 ctxt->input->end =
10474 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010475#ifdef DEBUG_PUSH
10476 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10477#endif
10478
Owen Taylor3473f882001-02-23 17:55:21 +000010479 } else if (ctxt->instate != XML_PARSER_EOF) {
10480 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10481 xmlParserInputBufferPtr in = ctxt->input->buf;
10482 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10483 (in->raw != NULL)) {
10484 int nbchars;
10485
10486 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10487 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010488 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010489 xmlGenericError(xmlGenericErrorContext,
10490 "xmlParseChunk: encoder error\n");
10491 return(XML_ERR_INVALID_ENCODING);
10492 }
10493 }
10494 }
10495 }
10496 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010497 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10498 (ctxt->input->buf != NULL)) {
10499 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10500 }
Daniel Veillard14412512005-01-21 23:53:26 +000010501 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010502 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010503 if (terminate) {
10504 /*
10505 * Check for termination
10506 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010507 int avail = 0;
10508
10509 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010510 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010511 avail = ctxt->input->length -
10512 (ctxt->input->cur - ctxt->input->base);
10513 else
10514 avail = ctxt->input->buf->buffer->use -
10515 (ctxt->input->cur - ctxt->input->base);
10516 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010517
Owen Taylor3473f882001-02-23 17:55:21 +000010518 if ((ctxt->instate != XML_PARSER_EOF) &&
10519 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010520 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010521 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010522 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010523 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010524 }
Owen Taylor3473f882001-02-23 17:55:21 +000010525 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010526 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010527 ctxt->sax->endDocument(ctxt->userData);
10528 }
10529 ctxt->instate = XML_PARSER_EOF;
10530 }
10531 return((xmlParserErrors) ctxt->errNo);
10532}
10533
10534/************************************************************************
10535 * *
10536 * I/O front end functions to the parser *
10537 * *
10538 ************************************************************************/
10539
10540/**
Owen Taylor3473f882001-02-23 17:55:21 +000010541 * xmlCreatePushParserCtxt:
10542 * @sax: a SAX handler
10543 * @user_data: The user data returned on SAX callbacks
10544 * @chunk: a pointer to an array of chars
10545 * @size: number of chars in the array
10546 * @filename: an optional file name or URI
10547 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010548 * Create a parser context for using the XML parser in push mode.
10549 * If @buffer and @size are non-NULL, the data is used to detect
10550 * the encoding. The remaining characters will be parsed so they
10551 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010552 * To allow content encoding detection, @size should be >= 4
10553 * The value of @filename is used for fetching external entities
10554 * and error/warning reports.
10555 *
10556 * Returns the new parser context or NULL
10557 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010558
Owen Taylor3473f882001-02-23 17:55:21 +000010559xmlParserCtxtPtr
10560xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10561 const char *chunk, int size, const char *filename) {
10562 xmlParserCtxtPtr ctxt;
10563 xmlParserInputPtr inputStream;
10564 xmlParserInputBufferPtr buf;
10565 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10566
10567 /*
10568 * plug some encoding conversion routines
10569 */
10570 if ((chunk != NULL) && (size >= 4))
10571 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10572
10573 buf = xmlAllocParserInputBuffer(enc);
10574 if (buf == NULL) return(NULL);
10575
10576 ctxt = xmlNewParserCtxt();
10577 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010578 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010579 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010580 return(NULL);
10581 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010582 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010583 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10584 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010585 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010586 xmlFreeParserInputBuffer(buf);
10587 xmlFreeParserCtxt(ctxt);
10588 return(NULL);
10589 }
Owen Taylor3473f882001-02-23 17:55:21 +000010590 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010591#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010592 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010593#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010594 xmlFree(ctxt->sax);
10595 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10596 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010598 xmlFreeParserInputBuffer(buf);
10599 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010600 return(NULL);
10601 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010602 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10603 if (sax->initialized == XML_SAX2_MAGIC)
10604 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10605 else
10606 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010607 if (user_data != NULL)
10608 ctxt->userData = user_data;
10609 }
10610 if (filename == NULL) {
10611 ctxt->directory = NULL;
10612 } else {
10613 ctxt->directory = xmlParserGetDirectory(filename);
10614 }
10615
10616 inputStream = xmlNewInputStream(ctxt);
10617 if (inputStream == NULL) {
10618 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010619 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010620 return(NULL);
10621 }
10622
10623 if (filename == NULL)
10624 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010625 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010626 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010627 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010628 if (inputStream->filename == NULL) {
10629 xmlFreeParserCtxt(ctxt);
10630 xmlFreeParserInputBuffer(buf);
10631 return(NULL);
10632 }
10633 }
Owen Taylor3473f882001-02-23 17:55:21 +000010634 inputStream->buf = buf;
10635 inputStream->base = inputStream->buf->buffer->content;
10636 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010637 inputStream->end =
10638 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010639
10640 inputPush(ctxt, inputStream);
10641
William M. Brack3a1cd212005-02-11 14:35:54 +000010642 /*
10643 * If the caller didn't provide an initial 'chunk' for determining
10644 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10645 * that it can be automatically determined later
10646 */
10647 if ((size == 0) || (chunk == NULL)) {
10648 ctxt->charset = XML_CHAR_ENCODING_NONE;
10649 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010650 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10651 int cur = ctxt->input->cur - ctxt->input->base;
10652
Owen Taylor3473f882001-02-23 17:55:21 +000010653 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010654
10655 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10656 ctxt->input->cur = ctxt->input->base + cur;
10657 ctxt->input->end =
10658 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010659#ifdef DEBUG_PUSH
10660 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10661#endif
10662 }
10663
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010664 if (enc != XML_CHAR_ENCODING_NONE) {
10665 xmlSwitchEncoding(ctxt, enc);
10666 }
10667
Owen Taylor3473f882001-02-23 17:55:21 +000010668 return(ctxt);
10669}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010670#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010671
10672/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010673 * xmlStopParser:
10674 * @ctxt: an XML parser context
10675 *
10676 * Blocks further parser processing
10677 */
10678void
10679xmlStopParser(xmlParserCtxtPtr ctxt) {
10680 if (ctxt == NULL)
10681 return;
10682 ctxt->instate = XML_PARSER_EOF;
10683 ctxt->disableSAX = 1;
10684 if (ctxt->input != NULL) {
10685 ctxt->input->cur = BAD_CAST"";
10686 ctxt->input->base = ctxt->input->cur;
10687 }
10688}
10689
10690/**
Owen Taylor3473f882001-02-23 17:55:21 +000010691 * xmlCreateIOParserCtxt:
10692 * @sax: a SAX handler
10693 * @user_data: The user data returned on SAX callbacks
10694 * @ioread: an I/O read function
10695 * @ioclose: an I/O close function
10696 * @ioctx: an I/O handler
10697 * @enc: the charset encoding if known
10698 *
10699 * Create a parser context for using the XML parser with an existing
10700 * I/O stream
10701 *
10702 * Returns the new parser context or NULL
10703 */
10704xmlParserCtxtPtr
10705xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10706 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10707 void *ioctx, xmlCharEncoding enc) {
10708 xmlParserCtxtPtr ctxt;
10709 xmlParserInputPtr inputStream;
10710 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010711
10712 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010713
10714 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10715 if (buf == NULL) return(NULL);
10716
10717 ctxt = xmlNewParserCtxt();
10718 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010719 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010720 return(NULL);
10721 }
10722 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010723#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010724 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010725#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010726 xmlFree(ctxt->sax);
10727 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10728 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010729 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010730 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010731 return(NULL);
10732 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010733 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10734 if (sax->initialized == XML_SAX2_MAGIC)
10735 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10736 else
10737 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010738 if (user_data != NULL)
10739 ctxt->userData = user_data;
10740 }
10741
10742 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10743 if (inputStream == NULL) {
10744 xmlFreeParserCtxt(ctxt);
10745 return(NULL);
10746 }
10747 inputPush(ctxt, inputStream);
10748
10749 return(ctxt);
10750}
10751
Daniel Veillard4432df22003-09-28 18:58:27 +000010752#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010753/************************************************************************
10754 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010755 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010756 * *
10757 ************************************************************************/
10758
10759/**
10760 * xmlIOParseDTD:
10761 * @sax: the SAX handler block or NULL
10762 * @input: an Input Buffer
10763 * @enc: the charset encoding if known
10764 *
10765 * Load and parse a DTD
10766 *
10767 * Returns the resulting xmlDtdPtr or NULL in case of error.
10768 * @input will be freed at parsing end.
10769 */
10770
10771xmlDtdPtr
10772xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10773 xmlCharEncoding enc) {
10774 xmlDtdPtr ret = NULL;
10775 xmlParserCtxtPtr ctxt;
10776 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010777 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010778
10779 if (input == NULL)
10780 return(NULL);
10781
10782 ctxt = xmlNewParserCtxt();
10783 if (ctxt == NULL) {
10784 return(NULL);
10785 }
10786
10787 /*
10788 * Set-up the SAX context
10789 */
10790 if (sax != NULL) {
10791 if (ctxt->sax != NULL)
10792 xmlFree(ctxt->sax);
10793 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010794 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010795 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010796 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010797
10798 /*
10799 * generate a parser input from the I/O handler
10800 */
10801
Daniel Veillard43caefb2003-12-07 19:32:22 +000010802 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010803 if (pinput == NULL) {
10804 if (sax != NULL) ctxt->sax = NULL;
10805 xmlFreeParserCtxt(ctxt);
10806 return(NULL);
10807 }
10808
10809 /*
10810 * plug some encoding conversion routines here.
10811 */
10812 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010813 if (enc != XML_CHAR_ENCODING_NONE) {
10814 xmlSwitchEncoding(ctxt, enc);
10815 }
Owen Taylor3473f882001-02-23 17:55:21 +000010816
10817 pinput->filename = NULL;
10818 pinput->line = 1;
10819 pinput->col = 1;
10820 pinput->base = ctxt->input->cur;
10821 pinput->cur = ctxt->input->cur;
10822 pinput->free = NULL;
10823
10824 /*
10825 * let's parse that entity knowing it's an external subset.
10826 */
10827 ctxt->inSubset = 2;
10828 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10829 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10830 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010831
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010832 if ((enc == XML_CHAR_ENCODING_NONE) &&
10833 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010834 /*
10835 * Get the 4 first bytes and decode the charset
10836 * if enc != XML_CHAR_ENCODING_NONE
10837 * plug some encoding conversion routines.
10838 */
10839 start[0] = RAW;
10840 start[1] = NXT(1);
10841 start[2] = NXT(2);
10842 start[3] = NXT(3);
10843 enc = xmlDetectCharEncoding(start, 4);
10844 if (enc != XML_CHAR_ENCODING_NONE) {
10845 xmlSwitchEncoding(ctxt, enc);
10846 }
10847 }
10848
Owen Taylor3473f882001-02-23 17:55:21 +000010849 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10850
10851 if (ctxt->myDoc != NULL) {
10852 if (ctxt->wellFormed) {
10853 ret = ctxt->myDoc->extSubset;
10854 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010855 if (ret != NULL) {
10856 xmlNodePtr tmp;
10857
10858 ret->doc = NULL;
10859 tmp = ret->children;
10860 while (tmp != NULL) {
10861 tmp->doc = NULL;
10862 tmp = tmp->next;
10863 }
10864 }
Owen Taylor3473f882001-02-23 17:55:21 +000010865 } else {
10866 ret = NULL;
10867 }
10868 xmlFreeDoc(ctxt->myDoc);
10869 ctxt->myDoc = NULL;
10870 }
10871 if (sax != NULL) ctxt->sax = NULL;
10872 xmlFreeParserCtxt(ctxt);
10873
10874 return(ret);
10875}
10876
10877/**
10878 * xmlSAXParseDTD:
10879 * @sax: the SAX handler block
10880 * @ExternalID: a NAME* containing the External ID of the DTD
10881 * @SystemID: a NAME* containing the URL to the DTD
10882 *
10883 * Load and parse an external subset.
10884 *
10885 * Returns the resulting xmlDtdPtr or NULL in case of error.
10886 */
10887
10888xmlDtdPtr
10889xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10890 const xmlChar *SystemID) {
10891 xmlDtdPtr ret = NULL;
10892 xmlParserCtxtPtr ctxt;
10893 xmlParserInputPtr input = NULL;
10894 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010895 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010896
10897 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10898
10899 ctxt = xmlNewParserCtxt();
10900 if (ctxt == NULL) {
10901 return(NULL);
10902 }
10903
10904 /*
10905 * Set-up the SAX context
10906 */
10907 if (sax != NULL) {
10908 if (ctxt->sax != NULL)
10909 xmlFree(ctxt->sax);
10910 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010911 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010912 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010913
10914 /*
10915 * Canonicalise the system ID
10916 */
10917 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010918 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010919 xmlFreeParserCtxt(ctxt);
10920 return(NULL);
10921 }
Owen Taylor3473f882001-02-23 17:55:21 +000010922
10923 /*
10924 * Ask the Entity resolver to load the damn thing
10925 */
10926
10927 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010928 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010929 if (input == NULL) {
10930 if (sax != NULL) ctxt->sax = NULL;
10931 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010932 if (systemIdCanonic != NULL)
10933 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010934 return(NULL);
10935 }
10936
10937 /*
10938 * plug some encoding conversion routines here.
10939 */
10940 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010941 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10942 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10943 xmlSwitchEncoding(ctxt, enc);
10944 }
Owen Taylor3473f882001-02-23 17:55:21 +000010945
10946 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010947 input->filename = (char *) systemIdCanonic;
10948 else
10949 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010950 input->line = 1;
10951 input->col = 1;
10952 input->base = ctxt->input->cur;
10953 input->cur = ctxt->input->cur;
10954 input->free = NULL;
10955
10956 /*
10957 * let's parse that entity knowing it's an external subset.
10958 */
10959 ctxt->inSubset = 2;
10960 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10961 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10962 ExternalID, SystemID);
10963 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10964
10965 if (ctxt->myDoc != NULL) {
10966 if (ctxt->wellFormed) {
10967 ret = ctxt->myDoc->extSubset;
10968 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010969 if (ret != NULL) {
10970 xmlNodePtr tmp;
10971
10972 ret->doc = NULL;
10973 tmp = ret->children;
10974 while (tmp != NULL) {
10975 tmp->doc = NULL;
10976 tmp = tmp->next;
10977 }
10978 }
Owen Taylor3473f882001-02-23 17:55:21 +000010979 } else {
10980 ret = NULL;
10981 }
10982 xmlFreeDoc(ctxt->myDoc);
10983 ctxt->myDoc = NULL;
10984 }
10985 if (sax != NULL) ctxt->sax = NULL;
10986 xmlFreeParserCtxt(ctxt);
10987
10988 return(ret);
10989}
10990
Daniel Veillard4432df22003-09-28 18:58:27 +000010991
Owen Taylor3473f882001-02-23 17:55:21 +000010992/**
10993 * xmlParseDTD:
10994 * @ExternalID: a NAME* containing the External ID of the DTD
10995 * @SystemID: a NAME* containing the URL to the DTD
10996 *
10997 * Load and parse an external subset.
10998 *
10999 * Returns the resulting xmlDtdPtr or NULL in case of error.
11000 */
11001
11002xmlDtdPtr
11003xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11004 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11005}
Daniel Veillard4432df22003-09-28 18:58:27 +000011006#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011007
11008/************************************************************************
11009 * *
11010 * Front ends when parsing an Entity *
11011 * *
11012 ************************************************************************/
11013
11014/**
Owen Taylor3473f882001-02-23 17:55:21 +000011015 * xmlParseCtxtExternalEntity:
11016 * @ctx: the existing parsing context
11017 * @URL: the URL for the entity to load
11018 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011019 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011020 *
11021 * Parse an external general entity within an existing parsing context
11022 * An external general parsed entity is well-formed if it matches the
11023 * production labeled extParsedEnt.
11024 *
11025 * [78] extParsedEnt ::= TextDecl? content
11026 *
11027 * Returns 0 if the entity is well formed, -1 in case of args problem and
11028 * the parser error code otherwise
11029 */
11030
11031int
11032xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011033 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011034 xmlParserCtxtPtr ctxt;
11035 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011036 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011037 xmlSAXHandlerPtr oldsax = NULL;
11038 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011039 xmlChar start[4];
11040 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011041
Daniel Veillardce682bc2004-11-05 17:22:25 +000011042 if (ctx == NULL) return(-1);
11043
Owen Taylor3473f882001-02-23 17:55:21 +000011044 if (ctx->depth > 40) {
11045 return(XML_ERR_ENTITY_LOOP);
11046 }
11047
Daniel Veillardcda96922001-08-21 10:56:31 +000011048 if (lst != NULL)
11049 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011050 if ((URL == NULL) && (ID == NULL))
11051 return(-1);
11052 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11053 return(-1);
11054
11055
11056 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11057 if (ctxt == NULL) return(-1);
11058 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011059 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011060 oldsax = ctxt->sax;
11061 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011062 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011063 newDoc = xmlNewDoc(BAD_CAST "1.0");
11064 if (newDoc == NULL) {
11065 xmlFreeParserCtxt(ctxt);
11066 return(-1);
11067 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011068 if (ctx->myDoc->dict) {
11069 newDoc->dict = ctx->myDoc->dict;
11070 xmlDictReference(newDoc->dict);
11071 }
Owen Taylor3473f882001-02-23 17:55:21 +000011072 if (ctx->myDoc != NULL) {
11073 newDoc->intSubset = ctx->myDoc->intSubset;
11074 newDoc->extSubset = ctx->myDoc->extSubset;
11075 }
11076 if (ctx->myDoc->URL != NULL) {
11077 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11078 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011079 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11080 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011081 ctxt->sax = oldsax;
11082 xmlFreeParserCtxt(ctxt);
11083 newDoc->intSubset = NULL;
11084 newDoc->extSubset = NULL;
11085 xmlFreeDoc(newDoc);
11086 return(-1);
11087 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011088 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011089 nodePush(ctxt, newDoc->children);
11090 if (ctx->myDoc == NULL) {
11091 ctxt->myDoc = newDoc;
11092 } else {
11093 ctxt->myDoc = ctx->myDoc;
11094 newDoc->children->doc = ctx->myDoc;
11095 }
11096
Daniel Veillard87a764e2001-06-20 17:41:10 +000011097 /*
11098 * Get the 4 first bytes and decode the charset
11099 * if enc != XML_CHAR_ENCODING_NONE
11100 * plug some encoding conversion routines.
11101 */
11102 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011103 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11104 start[0] = RAW;
11105 start[1] = NXT(1);
11106 start[2] = NXT(2);
11107 start[3] = NXT(3);
11108 enc = xmlDetectCharEncoding(start, 4);
11109 if (enc != XML_CHAR_ENCODING_NONE) {
11110 xmlSwitchEncoding(ctxt, enc);
11111 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011112 }
11113
Owen Taylor3473f882001-02-23 17:55:21 +000011114 /*
11115 * Parse a possible text declaration first
11116 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011117 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011118 xmlParseTextDecl(ctxt);
11119 }
11120
11121 /*
11122 * Doing validity checking on chunk doesn't make sense
11123 */
11124 ctxt->instate = XML_PARSER_CONTENT;
11125 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011126 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011127 ctxt->loadsubset = ctx->loadsubset;
11128 ctxt->depth = ctx->depth + 1;
11129 ctxt->replaceEntities = ctx->replaceEntities;
11130 if (ctxt->validate) {
11131 ctxt->vctxt.error = ctx->vctxt.error;
11132 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011133 } else {
11134 ctxt->vctxt.error = NULL;
11135 ctxt->vctxt.warning = NULL;
11136 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011137 ctxt->vctxt.nodeTab = NULL;
11138 ctxt->vctxt.nodeNr = 0;
11139 ctxt->vctxt.nodeMax = 0;
11140 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011141 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11142 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011143 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11144 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11145 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011146 ctxt->dictNames = ctx->dictNames;
11147 ctxt->attsDefault = ctx->attsDefault;
11148 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011149 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011150
11151 xmlParseContent(ctxt);
11152
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011153 ctx->validate = ctxt->validate;
11154 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011155 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011156 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011157 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011158 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011159 }
11160 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011161 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011162 }
11163
11164 if (!ctxt->wellFormed) {
11165 if (ctxt->errNo == 0)
11166 ret = 1;
11167 else
11168 ret = ctxt->errNo;
11169 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011170 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011171 xmlNodePtr cur;
11172
11173 /*
11174 * Return the newly created nodeset after unlinking it from
11175 * they pseudo parent.
11176 */
11177 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011178 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011179 while (cur != NULL) {
11180 cur->parent = NULL;
11181 cur = cur->next;
11182 }
11183 newDoc->children->children = NULL;
11184 }
11185 ret = 0;
11186 }
11187 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011188 ctxt->dict = NULL;
11189 ctxt->attsDefault = NULL;
11190 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011191 xmlFreeParserCtxt(ctxt);
11192 newDoc->intSubset = NULL;
11193 newDoc->extSubset = NULL;
11194 xmlFreeDoc(newDoc);
11195
11196 return(ret);
11197}
11198
11199/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011200 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011201 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011202 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011203 * @sax: the SAX handler bloc (possibly NULL)
11204 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11205 * @depth: Used for loop detection, use 0
11206 * @URL: the URL for the entity to load
11207 * @ID: the System ID for the entity to load
11208 * @list: the return value for the set of parsed nodes
11209 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011210 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011211 *
11212 * Returns 0 if the entity is well formed, -1 in case of args problem and
11213 * the parser error code otherwise
11214 */
11215
Daniel Veillard7d515752003-09-26 19:12:37 +000011216static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011217xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11218 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011219 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011220 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011221 xmlParserCtxtPtr ctxt;
11222 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011223 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011224 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011225 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011226 xmlChar start[4];
11227 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011228
11229 if (depth > 40) {
11230 return(XML_ERR_ENTITY_LOOP);
11231 }
11232
11233
11234
11235 if (list != NULL)
11236 *list = NULL;
11237 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011238 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011239 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011240 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011241
11242
11243 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011244 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011246 if (oldctxt != NULL) {
11247 ctxt->_private = oldctxt->_private;
11248 ctxt->loadsubset = oldctxt->loadsubset;
11249 ctxt->validate = oldctxt->validate;
11250 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011251 ctxt->record_info = oldctxt->record_info;
11252 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11253 ctxt->node_seq.length = oldctxt->node_seq.length;
11254 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011255 } else {
11256 /*
11257 * Doing validity checking on chunk without context
11258 * doesn't make sense
11259 */
11260 ctxt->_private = NULL;
11261 ctxt->validate = 0;
11262 ctxt->external = 2;
11263 ctxt->loadsubset = 0;
11264 }
Owen Taylor3473f882001-02-23 17:55:21 +000011265 if (sax != NULL) {
11266 oldsax = ctxt->sax;
11267 ctxt->sax = sax;
11268 if (user_data != NULL)
11269 ctxt->userData = user_data;
11270 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011271 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011272 newDoc = xmlNewDoc(BAD_CAST "1.0");
11273 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011274 ctxt->node_seq.maximum = 0;
11275 ctxt->node_seq.length = 0;
11276 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011277 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011278 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011279 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011280 newDoc->intSubset = doc->intSubset;
11281 newDoc->extSubset = doc->extSubset;
11282 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011283 xmlDictReference(newDoc->dict);
11284
Owen Taylor3473f882001-02-23 17:55:21 +000011285 if (doc->URL != NULL) {
11286 newDoc->URL = xmlStrdup(doc->URL);
11287 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011288 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11289 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011290 if (sax != NULL)
11291 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011292 ctxt->node_seq.maximum = 0;
11293 ctxt->node_seq.length = 0;
11294 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011295 xmlFreeParserCtxt(ctxt);
11296 newDoc->intSubset = NULL;
11297 newDoc->extSubset = NULL;
11298 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011299 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011300 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011301 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011302 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011303 ctxt->myDoc = doc;
11304 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011305
Daniel Veillard87a764e2001-06-20 17:41:10 +000011306 /*
11307 * Get the 4 first bytes and decode the charset
11308 * if enc != XML_CHAR_ENCODING_NONE
11309 * plug some encoding conversion routines.
11310 */
11311 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011312 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11313 start[0] = RAW;
11314 start[1] = NXT(1);
11315 start[2] = NXT(2);
11316 start[3] = NXT(3);
11317 enc = xmlDetectCharEncoding(start, 4);
11318 if (enc != XML_CHAR_ENCODING_NONE) {
11319 xmlSwitchEncoding(ctxt, enc);
11320 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011321 }
11322
Owen Taylor3473f882001-02-23 17:55:21 +000011323 /*
11324 * Parse a possible text declaration first
11325 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011326 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011327 xmlParseTextDecl(ctxt);
11328 }
11329
Owen Taylor3473f882001-02-23 17:55:21 +000011330 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011331 ctxt->depth = depth;
11332
11333 xmlParseContent(ctxt);
11334
Daniel Veillard561b7f82002-03-20 21:55:57 +000011335 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011336 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011337 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011338 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011339 }
11340 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011341 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011342 }
11343
11344 if (!ctxt->wellFormed) {
11345 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011346 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011347 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011348 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011349 } else {
11350 if (list != NULL) {
11351 xmlNodePtr cur;
11352
11353 /*
11354 * Return the newly created nodeset after unlinking it from
11355 * they pseudo parent.
11356 */
11357 cur = newDoc->children->children;
11358 *list = cur;
11359 while (cur != NULL) {
11360 cur->parent = NULL;
11361 cur = cur->next;
11362 }
11363 newDoc->children->children = NULL;
11364 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011365 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011366 }
11367 if (sax != NULL)
11368 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011369 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11370 oldctxt->node_seq.length = ctxt->node_seq.length;
11371 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011372 ctxt->node_seq.maximum = 0;
11373 ctxt->node_seq.length = 0;
11374 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011375 xmlFreeParserCtxt(ctxt);
11376 newDoc->intSubset = NULL;
11377 newDoc->extSubset = NULL;
11378 xmlFreeDoc(newDoc);
11379
11380 return(ret);
11381}
11382
Daniel Veillard81273902003-09-30 00:43:48 +000011383#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011384/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011385 * xmlParseExternalEntity:
11386 * @doc: the document the chunk pertains to
11387 * @sax: the SAX handler bloc (possibly NULL)
11388 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11389 * @depth: Used for loop detection, use 0
11390 * @URL: the URL for the entity to load
11391 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011392 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011393 *
11394 * Parse an external general entity
11395 * An external general parsed entity is well-formed if it matches the
11396 * production labeled extParsedEnt.
11397 *
11398 * [78] extParsedEnt ::= TextDecl? content
11399 *
11400 * Returns 0 if the entity is well formed, -1 in case of args problem and
11401 * the parser error code otherwise
11402 */
11403
11404int
11405xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011406 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011407 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011408 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011409}
11410
11411/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011412 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011413 * @doc: the document the chunk pertains to
11414 * @sax: the SAX handler bloc (possibly NULL)
11415 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11416 * @depth: Used for loop detection, use 0
11417 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011418 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011419 *
11420 * Parse a well-balanced chunk of an XML document
11421 * called by the parser
11422 * The allowed sequence for the Well Balanced Chunk is the one defined by
11423 * the content production in the XML grammar:
11424 *
11425 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11426 *
11427 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11428 * the parser error code otherwise
11429 */
11430
11431int
11432xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011433 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011434 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11435 depth, string, lst, 0 );
11436}
Daniel Veillard81273902003-09-30 00:43:48 +000011437#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011438
11439/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011440 * xmlParseBalancedChunkMemoryInternal:
11441 * @oldctxt: the existing parsing context
11442 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11443 * @user_data: the user data field for the parser context
11444 * @lst: the return value for the set of parsed nodes
11445 *
11446 *
11447 * Parse a well-balanced chunk of an XML document
11448 * called by the parser
11449 * The allowed sequence for the Well Balanced Chunk is the one defined by
11450 * the content production in the XML grammar:
11451 *
11452 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11453 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011454 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11455 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011456 *
11457 * In case recover is set to 1, the nodelist will not be empty even if
11458 * the parsed chunk is not well balanced.
11459 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011460static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011461xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11462 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11463 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011464 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011465 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011466 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011467 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011468 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011469 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011470 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011471
11472 if (oldctxt->depth > 40) {
11473 return(XML_ERR_ENTITY_LOOP);
11474 }
11475
11476
11477 if (lst != NULL)
11478 *lst = NULL;
11479 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011480 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011481
11482 size = xmlStrlen(string);
11483
11484 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011485 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011486 if (user_data != NULL)
11487 ctxt->userData = user_data;
11488 else
11489 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011490 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11491 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011492 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11493 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11494 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011495
11496 oldsax = ctxt->sax;
11497 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011498 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011499 ctxt->replaceEntities = oldctxt->replaceEntities;
11500 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011501
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011502 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011503 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011504 newDoc = xmlNewDoc(BAD_CAST "1.0");
11505 if (newDoc == NULL) {
11506 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011507 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011508 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011509 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011510 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011511 newDoc->dict = ctxt->dict;
11512 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011513 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011514 } else {
11515 ctxt->myDoc = oldctxt->myDoc;
11516 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011517 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011518 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011519 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11520 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011521 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011522 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011523 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011524 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011525 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011526 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011527 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011528 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011529 ctxt->myDoc->children = NULL;
11530 ctxt->myDoc->last = NULL;
11531 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011532 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011533 ctxt->instate = XML_PARSER_CONTENT;
11534 ctxt->depth = oldctxt->depth + 1;
11535
Daniel Veillard328f48c2002-11-15 15:24:34 +000011536 ctxt->validate = 0;
11537 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011538 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11539 /*
11540 * ID/IDREF registration will be done in xmlValidateElement below
11541 */
11542 ctxt->loadsubset |= XML_SKIP_IDS;
11543 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011544 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011545 ctxt->attsDefault = oldctxt->attsDefault;
11546 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011547
Daniel Veillard68e9e742002-11-16 15:35:11 +000011548 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011549 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011550 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011551 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011552 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011553 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011554 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011555 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556 }
11557
11558 if (!ctxt->wellFormed) {
11559 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011560 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011561 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011562 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011563 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011564 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011565 }
11566
William M. Brack7b9154b2003-09-27 19:23:50 +000011567 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011568 xmlNodePtr cur;
11569
11570 /*
11571 * Return the newly created nodeset after unlinking it from
11572 * they pseudo parent.
11573 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011574 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011575 *lst = cur;
11576 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011577#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011578 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11579 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11580 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011581 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11582 oldctxt->myDoc, cur);
11583 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011584#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011585 cur->parent = NULL;
11586 cur = cur->next;
11587 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011588 ctxt->myDoc->children->children = NULL;
11589 }
11590 if (ctxt->myDoc != NULL) {
11591 xmlFreeNode(ctxt->myDoc->children);
11592 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011593 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011594 }
11595
11596 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011597 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011598 ctxt->attsDefault = NULL;
11599 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011600 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011601 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011602 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011603 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011604
11605 return(ret);
11606}
11607
Daniel Veillard29b17482004-08-16 00:39:03 +000011608/**
11609 * xmlParseInNodeContext:
11610 * @node: the context node
11611 * @data: the input string
11612 * @datalen: the input string length in bytes
11613 * @options: a combination of xmlParserOption
11614 * @lst: the return value for the set of parsed nodes
11615 *
11616 * Parse a well-balanced chunk of an XML document
11617 * within the context (DTD, namespaces, etc ...) of the given node.
11618 *
11619 * The allowed sequence for the data is a Well Balanced Chunk defined by
11620 * the content production in the XML grammar:
11621 *
11622 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11623 *
11624 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11625 * error code otherwise
11626 */
11627xmlParserErrors
11628xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11629 int options, xmlNodePtr *lst) {
11630#ifdef SAX2
11631 xmlParserCtxtPtr ctxt;
11632 xmlDocPtr doc = NULL;
11633 xmlNodePtr fake, cur;
11634 int nsnr = 0;
11635
11636 xmlParserErrors ret = XML_ERR_OK;
11637
11638 /*
11639 * check all input parameters, grab the document
11640 */
11641 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11642 return(XML_ERR_INTERNAL_ERROR);
11643 switch (node->type) {
11644 case XML_ELEMENT_NODE:
11645 case XML_ATTRIBUTE_NODE:
11646 case XML_TEXT_NODE:
11647 case XML_CDATA_SECTION_NODE:
11648 case XML_ENTITY_REF_NODE:
11649 case XML_PI_NODE:
11650 case XML_COMMENT_NODE:
11651 case XML_DOCUMENT_NODE:
11652 case XML_HTML_DOCUMENT_NODE:
11653 break;
11654 default:
11655 return(XML_ERR_INTERNAL_ERROR);
11656
11657 }
11658 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11659 (node->type != XML_DOCUMENT_NODE) &&
11660 (node->type != XML_HTML_DOCUMENT_NODE))
11661 node = node->parent;
11662 if (node == NULL)
11663 return(XML_ERR_INTERNAL_ERROR);
11664 if (node->type == XML_ELEMENT_NODE)
11665 doc = node->doc;
11666 else
11667 doc = (xmlDocPtr) node;
11668 if (doc == NULL)
11669 return(XML_ERR_INTERNAL_ERROR);
11670
11671 /*
11672 * allocate a context and set-up everything not related to the
11673 * node position in the tree
11674 */
11675 if (doc->type == XML_DOCUMENT_NODE)
11676 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11677#ifdef LIBXML_HTML_ENABLED
11678 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11679 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11680#endif
11681 else
11682 return(XML_ERR_INTERNAL_ERROR);
11683
11684 if (ctxt == NULL)
11685 return(XML_ERR_NO_MEMORY);
11686 fake = xmlNewComment(NULL);
11687 if (fake == NULL) {
11688 xmlFreeParserCtxt(ctxt);
11689 return(XML_ERR_NO_MEMORY);
11690 }
11691 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011692
11693 /*
11694 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11695 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11696 * we must wait until the last moment to free the original one.
11697 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011698 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011699 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011700 xmlDictFree(ctxt->dict);
11701 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011702 } else
11703 options |= XML_PARSE_NODICT;
11704
11705 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011706 xmlDetectSAX2(ctxt);
11707 ctxt->myDoc = doc;
11708
11709 if (node->type == XML_ELEMENT_NODE) {
11710 nodePush(ctxt, node);
11711 /*
11712 * initialize the SAX2 namespaces stack
11713 */
11714 cur = node;
11715 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11716 xmlNsPtr ns = cur->nsDef;
11717 const xmlChar *iprefix, *ihref;
11718
11719 while (ns != NULL) {
11720 if (ctxt->dict) {
11721 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11722 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11723 } else {
11724 iprefix = ns->prefix;
11725 ihref = ns->href;
11726 }
11727
11728 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11729 nsPush(ctxt, iprefix, ihref);
11730 nsnr++;
11731 }
11732 ns = ns->next;
11733 }
11734 cur = cur->parent;
11735 }
11736 ctxt->instate = XML_PARSER_CONTENT;
11737 }
11738
11739 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11740 /*
11741 * ID/IDREF registration will be done in xmlValidateElement below
11742 */
11743 ctxt->loadsubset |= XML_SKIP_IDS;
11744 }
11745
Daniel Veillard499cc922006-01-18 17:22:35 +000011746#ifdef LIBXML_HTML_ENABLED
11747 if (doc->type == XML_HTML_DOCUMENT_NODE)
11748 __htmlParseContent(ctxt);
11749 else
11750#endif
11751 xmlParseContent(ctxt);
11752
Daniel Veillard29b17482004-08-16 00:39:03 +000011753 nsPop(ctxt, nsnr);
11754 if ((RAW == '<') && (NXT(1) == '/')) {
11755 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11756 } else if (RAW != 0) {
11757 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11758 }
11759 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11760 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11761 ctxt->wellFormed = 0;
11762 }
11763
11764 if (!ctxt->wellFormed) {
11765 if (ctxt->errNo == 0)
11766 ret = XML_ERR_INTERNAL_ERROR;
11767 else
11768 ret = (xmlParserErrors)ctxt->errNo;
11769 } else {
11770 ret = XML_ERR_OK;
11771 }
11772
11773 /*
11774 * Return the newly created nodeset after unlinking it from
11775 * the pseudo sibling.
11776 */
11777
11778 cur = fake->next;
11779 fake->next = NULL;
11780 node->last = fake;
11781
11782 if (cur != NULL) {
11783 cur->prev = NULL;
11784 }
11785
11786 *lst = cur;
11787
11788 while (cur != NULL) {
11789 cur->parent = NULL;
11790 cur = cur->next;
11791 }
11792
11793 xmlUnlinkNode(fake);
11794 xmlFreeNode(fake);
11795
11796
11797 if (ret != XML_ERR_OK) {
11798 xmlFreeNodeList(*lst);
11799 *lst = NULL;
11800 }
William M. Brackc3f81342004-10-03 01:22:44 +000011801
William M. Brackb7b54de2004-10-06 16:38:01 +000011802 if (doc->dict != NULL)
11803 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011804 xmlFreeParserCtxt(ctxt);
11805
11806 return(ret);
11807#else /* !SAX2 */
11808 return(XML_ERR_INTERNAL_ERROR);
11809#endif
11810}
11811
Daniel Veillard81273902003-09-30 00:43:48 +000011812#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011813/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011814 * xmlParseBalancedChunkMemoryRecover:
11815 * @doc: the document the chunk pertains to
11816 * @sax: the SAX handler bloc (possibly NULL)
11817 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11818 * @depth: Used for loop detection, use 0
11819 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11820 * @lst: the return value for the set of parsed nodes
11821 * @recover: return nodes even if the data is broken (use 0)
11822 *
11823 *
11824 * Parse a well-balanced chunk of an XML document
11825 * called by the parser
11826 * The allowed sequence for the Well Balanced Chunk is the one defined by
11827 * the content production in the XML grammar:
11828 *
11829 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11830 *
11831 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11832 * the parser error code otherwise
11833 *
11834 * In case recover is set to 1, the nodelist will not be empty even if
11835 * the parsed chunk is not well balanced.
11836 */
11837int
11838xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11839 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11840 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011841 xmlParserCtxtPtr ctxt;
11842 xmlDocPtr newDoc;
11843 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011844 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011845 int size;
11846 int ret = 0;
11847
11848 if (depth > 40) {
11849 return(XML_ERR_ENTITY_LOOP);
11850 }
11851
11852
Daniel Veillardcda96922001-08-21 10:56:31 +000011853 if (lst != NULL)
11854 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011855 if (string == NULL)
11856 return(-1);
11857
11858 size = xmlStrlen(string);
11859
11860 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11861 if (ctxt == NULL) return(-1);
11862 ctxt->userData = ctxt;
11863 if (sax != NULL) {
11864 oldsax = ctxt->sax;
11865 ctxt->sax = sax;
11866 if (user_data != NULL)
11867 ctxt->userData = user_data;
11868 }
11869 newDoc = xmlNewDoc(BAD_CAST "1.0");
11870 if (newDoc == NULL) {
11871 xmlFreeParserCtxt(ctxt);
11872 return(-1);
11873 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011874 if ((doc != NULL) && (doc->dict != NULL)) {
11875 xmlDictFree(ctxt->dict);
11876 ctxt->dict = doc->dict;
11877 xmlDictReference(ctxt->dict);
11878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11881 ctxt->dictNames = 1;
11882 } else {
11883 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11884 }
Owen Taylor3473f882001-02-23 17:55:21 +000011885 if (doc != NULL) {
11886 newDoc->intSubset = doc->intSubset;
11887 newDoc->extSubset = doc->extSubset;
11888 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011889 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11890 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011891 if (sax != NULL)
11892 ctxt->sax = oldsax;
11893 xmlFreeParserCtxt(ctxt);
11894 newDoc->intSubset = NULL;
11895 newDoc->extSubset = NULL;
11896 xmlFreeDoc(newDoc);
11897 return(-1);
11898 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011899 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11900 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011901 if (doc == NULL) {
11902 ctxt->myDoc = newDoc;
11903 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011904 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011905 newDoc->children->doc = doc;
11906 }
11907 ctxt->instate = XML_PARSER_CONTENT;
11908 ctxt->depth = depth;
11909
11910 /*
11911 * Doing validity checking on chunk doesn't make sense
11912 */
11913 ctxt->validate = 0;
11914 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011915 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011916
Daniel Veillardb39bc392002-10-26 19:29:51 +000011917 if ( doc != NULL ){
11918 content = doc->children;
11919 doc->children = NULL;
11920 xmlParseContent(ctxt);
11921 doc->children = content;
11922 }
11923 else {
11924 xmlParseContent(ctxt);
11925 }
Owen Taylor3473f882001-02-23 17:55:21 +000011926 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011927 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011928 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011929 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011930 }
11931 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011932 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011933 }
11934
11935 if (!ctxt->wellFormed) {
11936 if (ctxt->errNo == 0)
11937 ret = 1;
11938 else
11939 ret = ctxt->errNo;
11940 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011941 ret = 0;
11942 }
11943
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011944 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11945 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011946
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011947 /*
11948 * Return the newly created nodeset after unlinking it from
11949 * they pseudo parent.
11950 */
11951 cur = newDoc->children->children;
11952 *lst = cur;
11953 while (cur != NULL) {
11954 xmlSetTreeDoc(cur, doc);
11955 cur->parent = NULL;
11956 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011957 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011958 newDoc->children->children = NULL;
11959 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011960
Owen Taylor3473f882001-02-23 17:55:21 +000011961 if (sax != NULL)
11962 ctxt->sax = oldsax;
11963 xmlFreeParserCtxt(ctxt);
11964 newDoc->intSubset = NULL;
11965 newDoc->extSubset = NULL;
11966 xmlFreeDoc(newDoc);
11967
11968 return(ret);
11969}
11970
11971/**
11972 * xmlSAXParseEntity:
11973 * @sax: the SAX handler block
11974 * @filename: the filename
11975 *
11976 * parse an XML external entity out of context and build a tree.
11977 * It use the given SAX function block to handle the parsing callback.
11978 * If sax is NULL, fallback to the default DOM tree building routines.
11979 *
11980 * [78] extParsedEnt ::= TextDecl? content
11981 *
11982 * This correspond to a "Well Balanced" chunk
11983 *
11984 * Returns the resulting document tree
11985 */
11986
11987xmlDocPtr
11988xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11989 xmlDocPtr ret;
11990 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011991
11992 ctxt = xmlCreateFileParserCtxt(filename);
11993 if (ctxt == NULL) {
11994 return(NULL);
11995 }
11996 if (sax != NULL) {
11997 if (ctxt->sax != NULL)
11998 xmlFree(ctxt->sax);
11999 ctxt->sax = sax;
12000 ctxt->userData = NULL;
12001 }
12002
Owen Taylor3473f882001-02-23 17:55:21 +000012003 xmlParseExtParsedEnt(ctxt);
12004
12005 if (ctxt->wellFormed)
12006 ret = ctxt->myDoc;
12007 else {
12008 ret = NULL;
12009 xmlFreeDoc(ctxt->myDoc);
12010 ctxt->myDoc = NULL;
12011 }
12012 if (sax != NULL)
12013 ctxt->sax = NULL;
12014 xmlFreeParserCtxt(ctxt);
12015
12016 return(ret);
12017}
12018
12019/**
12020 * xmlParseEntity:
12021 * @filename: the filename
12022 *
12023 * parse an XML external entity out of context and build a tree.
12024 *
12025 * [78] extParsedEnt ::= TextDecl? content
12026 *
12027 * This correspond to a "Well Balanced" chunk
12028 *
12029 * Returns the resulting document tree
12030 */
12031
12032xmlDocPtr
12033xmlParseEntity(const char *filename) {
12034 return(xmlSAXParseEntity(NULL, filename));
12035}
Daniel Veillard81273902003-09-30 00:43:48 +000012036#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012037
12038/**
12039 * xmlCreateEntityParserCtxt:
12040 * @URL: the entity URL
12041 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012042 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012043 *
12044 * Create a parser context for an external entity
12045 * Automatic support for ZLIB/Compress compressed document is provided
12046 * by default if found at compile-time.
12047 *
12048 * Returns the new parser context or NULL
12049 */
12050xmlParserCtxtPtr
12051xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12052 const xmlChar *base) {
12053 xmlParserCtxtPtr ctxt;
12054 xmlParserInputPtr inputStream;
12055 char *directory = NULL;
12056 xmlChar *uri;
12057
12058 ctxt = xmlNewParserCtxt();
12059 if (ctxt == NULL) {
12060 return(NULL);
12061 }
12062
12063 uri = xmlBuildURI(URL, base);
12064
12065 if (uri == NULL) {
12066 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12067 if (inputStream == NULL) {
12068 xmlFreeParserCtxt(ctxt);
12069 return(NULL);
12070 }
12071
12072 inputPush(ctxt, inputStream);
12073
12074 if ((ctxt->directory == NULL) && (directory == NULL))
12075 directory = xmlParserGetDirectory((char *)URL);
12076 if ((ctxt->directory == NULL) && (directory != NULL))
12077 ctxt->directory = directory;
12078 } else {
12079 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12080 if (inputStream == NULL) {
12081 xmlFree(uri);
12082 xmlFreeParserCtxt(ctxt);
12083 return(NULL);
12084 }
12085
12086 inputPush(ctxt, inputStream);
12087
12088 if ((ctxt->directory == NULL) && (directory == NULL))
12089 directory = xmlParserGetDirectory((char *)uri);
12090 if ((ctxt->directory == NULL) && (directory != NULL))
12091 ctxt->directory = directory;
12092 xmlFree(uri);
12093 }
Owen Taylor3473f882001-02-23 17:55:21 +000012094 return(ctxt);
12095}
12096
12097/************************************************************************
12098 * *
12099 * Front ends when parsing from a file *
12100 * *
12101 ************************************************************************/
12102
12103/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012104 * xmlCreateURLParserCtxt:
12105 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012106 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012107 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012108 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012109 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012110 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012111 *
12112 * Returns the new parser context or NULL
12113 */
12114xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012115xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012116{
12117 xmlParserCtxtPtr ctxt;
12118 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012119 char *directory = NULL;
12120
Owen Taylor3473f882001-02-23 17:55:21 +000012121 ctxt = xmlNewParserCtxt();
12122 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012123 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012124 return(NULL);
12125 }
12126
Daniel Veillarddf292f72005-01-16 19:00:15 +000012127 if (options)
12128 xmlCtxtUseOptions(ctxt, options);
12129 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012130
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012131 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012132 if (inputStream == NULL) {
12133 xmlFreeParserCtxt(ctxt);
12134 return(NULL);
12135 }
12136
Owen Taylor3473f882001-02-23 17:55:21 +000012137 inputPush(ctxt, inputStream);
12138 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012139 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012140 if ((ctxt->directory == NULL) && (directory != NULL))
12141 ctxt->directory = directory;
12142
12143 return(ctxt);
12144}
12145
Daniel Veillard61b93382003-11-03 14:28:31 +000012146/**
12147 * xmlCreateFileParserCtxt:
12148 * @filename: the filename
12149 *
12150 * Create a parser context for a file content.
12151 * Automatic support for ZLIB/Compress compressed document is provided
12152 * by default if found at compile-time.
12153 *
12154 * Returns the new parser context or NULL
12155 */
12156xmlParserCtxtPtr
12157xmlCreateFileParserCtxt(const char *filename)
12158{
12159 return(xmlCreateURLParserCtxt(filename, 0));
12160}
12161
Daniel Veillard81273902003-09-30 00:43:48 +000012162#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012163/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012164 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012165 * @sax: the SAX handler block
12166 * @filename: the filename
12167 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12168 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012169 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012170 *
12171 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12172 * compressed document is provided by default if found at compile-time.
12173 * It use the given SAX function block to handle the parsing callback.
12174 * If sax is NULL, fallback to the default DOM tree building routines.
12175 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012176 * User data (void *) is stored within the parser context in the
12177 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012178 *
Owen Taylor3473f882001-02-23 17:55:21 +000012179 * Returns the resulting document tree
12180 */
12181
12182xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012183xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12184 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012185 xmlDocPtr ret;
12186 xmlParserCtxtPtr ctxt;
12187 char *directory = NULL;
12188
Daniel Veillard635ef722001-10-29 11:48:19 +000012189 xmlInitParser();
12190
Owen Taylor3473f882001-02-23 17:55:21 +000012191 ctxt = xmlCreateFileParserCtxt(filename);
12192 if (ctxt == NULL) {
12193 return(NULL);
12194 }
12195 if (sax != NULL) {
12196 if (ctxt->sax != NULL)
12197 xmlFree(ctxt->sax);
12198 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012199 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012200 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012201 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012202 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012203 }
Owen Taylor3473f882001-02-23 17:55:21 +000012204
12205 if ((ctxt->directory == NULL) && (directory == NULL))
12206 directory = xmlParserGetDirectory(filename);
12207 if ((ctxt->directory == NULL) && (directory != NULL))
12208 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12209
Daniel Veillarddad3f682002-11-17 16:47:27 +000012210 ctxt->recovery = recovery;
12211
Owen Taylor3473f882001-02-23 17:55:21 +000012212 xmlParseDocument(ctxt);
12213
William M. Brackc07329e2003-09-08 01:57:30 +000012214 if ((ctxt->wellFormed) || recovery) {
12215 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012216 if (ret != NULL) {
12217 if (ctxt->input->buf->compressed > 0)
12218 ret->compression = 9;
12219 else
12220 ret->compression = ctxt->input->buf->compressed;
12221 }
William M. Brackc07329e2003-09-08 01:57:30 +000012222 }
Owen Taylor3473f882001-02-23 17:55:21 +000012223 else {
12224 ret = NULL;
12225 xmlFreeDoc(ctxt->myDoc);
12226 ctxt->myDoc = NULL;
12227 }
12228 if (sax != NULL)
12229 ctxt->sax = NULL;
12230 xmlFreeParserCtxt(ctxt);
12231
12232 return(ret);
12233}
12234
12235/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012236 * xmlSAXParseFile:
12237 * @sax: the SAX handler block
12238 * @filename: the filename
12239 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12240 * documents
12241 *
12242 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12243 * compressed document is provided by default if found at compile-time.
12244 * It use the given SAX function block to handle the parsing callback.
12245 * If sax is NULL, fallback to the default DOM tree building routines.
12246 *
12247 * Returns the resulting document tree
12248 */
12249
12250xmlDocPtr
12251xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12252 int recovery) {
12253 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12254}
12255
12256/**
Owen Taylor3473f882001-02-23 17:55:21 +000012257 * xmlRecoverDoc:
12258 * @cur: a pointer to an array of xmlChar
12259 *
12260 * parse an XML in-memory document and build a tree.
12261 * In the case the document is not Well Formed, a tree is built anyway
12262 *
12263 * Returns the resulting document tree
12264 */
12265
12266xmlDocPtr
12267xmlRecoverDoc(xmlChar *cur) {
12268 return(xmlSAXParseDoc(NULL, cur, 1));
12269}
12270
12271/**
12272 * xmlParseFile:
12273 * @filename: the filename
12274 *
12275 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12276 * compressed document is provided by default if found at compile-time.
12277 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012278 * Returns the resulting document tree if the file was wellformed,
12279 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012280 */
12281
12282xmlDocPtr
12283xmlParseFile(const char *filename) {
12284 return(xmlSAXParseFile(NULL, filename, 0));
12285}
12286
12287/**
12288 * xmlRecoverFile:
12289 * @filename: the filename
12290 *
12291 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12292 * compressed document is provided by default if found at compile-time.
12293 * In the case the document is not Well Formed, a tree is built anyway
12294 *
12295 * Returns the resulting document tree
12296 */
12297
12298xmlDocPtr
12299xmlRecoverFile(const char *filename) {
12300 return(xmlSAXParseFile(NULL, filename, 1));
12301}
12302
12303
12304/**
12305 * xmlSetupParserForBuffer:
12306 * @ctxt: an XML parser context
12307 * @buffer: a xmlChar * buffer
12308 * @filename: a file name
12309 *
12310 * Setup the parser context to parse a new buffer; Clears any prior
12311 * contents from the parser context. The buffer parameter must not be
12312 * NULL, but the filename parameter can be
12313 */
12314void
12315xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12316 const char* filename)
12317{
12318 xmlParserInputPtr input;
12319
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012320 if ((ctxt == NULL) || (buffer == NULL))
12321 return;
12322
Owen Taylor3473f882001-02-23 17:55:21 +000012323 input = xmlNewInputStream(ctxt);
12324 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012325 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012326 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012327 return;
12328 }
12329
12330 xmlClearParserCtxt(ctxt);
12331 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012332 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012333 input->base = buffer;
12334 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012335 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012336 inputPush(ctxt, input);
12337}
12338
12339/**
12340 * xmlSAXUserParseFile:
12341 * @sax: a SAX handler
12342 * @user_data: The user data returned on SAX callbacks
12343 * @filename: a file name
12344 *
12345 * parse an XML file and call the given SAX handler routines.
12346 * Automatic support for ZLIB/Compress compressed document is provided
12347 *
12348 * Returns 0 in case of success or a error number otherwise
12349 */
12350int
12351xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12352 const char *filename) {
12353 int ret = 0;
12354 xmlParserCtxtPtr ctxt;
12355
12356 ctxt = xmlCreateFileParserCtxt(filename);
12357 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012358#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012359 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012360#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012361 xmlFree(ctxt->sax);
12362 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012363 xmlDetectSAX2(ctxt);
12364
Owen Taylor3473f882001-02-23 17:55:21 +000012365 if (user_data != NULL)
12366 ctxt->userData = user_data;
12367
12368 xmlParseDocument(ctxt);
12369
12370 if (ctxt->wellFormed)
12371 ret = 0;
12372 else {
12373 if (ctxt->errNo != 0)
12374 ret = ctxt->errNo;
12375 else
12376 ret = -1;
12377 }
12378 if (sax != NULL)
12379 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012380 if (ctxt->myDoc != NULL) {
12381 xmlFreeDoc(ctxt->myDoc);
12382 ctxt->myDoc = NULL;
12383 }
Owen Taylor3473f882001-02-23 17:55:21 +000012384 xmlFreeParserCtxt(ctxt);
12385
12386 return ret;
12387}
Daniel Veillard81273902003-09-30 00:43:48 +000012388#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012389
12390/************************************************************************
12391 * *
12392 * Front ends when parsing from memory *
12393 * *
12394 ************************************************************************/
12395
12396/**
12397 * xmlCreateMemoryParserCtxt:
12398 * @buffer: a pointer to a char array
12399 * @size: the size of the array
12400 *
12401 * Create a parser context for an XML in-memory document.
12402 *
12403 * Returns the new parser context or NULL
12404 */
12405xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012406xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012407 xmlParserCtxtPtr ctxt;
12408 xmlParserInputPtr input;
12409 xmlParserInputBufferPtr buf;
12410
12411 if (buffer == NULL)
12412 return(NULL);
12413 if (size <= 0)
12414 return(NULL);
12415
12416 ctxt = xmlNewParserCtxt();
12417 if (ctxt == NULL)
12418 return(NULL);
12419
Daniel Veillard53350552003-09-18 13:35:51 +000012420 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012421 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012422 if (buf == NULL) {
12423 xmlFreeParserCtxt(ctxt);
12424 return(NULL);
12425 }
Owen Taylor3473f882001-02-23 17:55:21 +000012426
12427 input = xmlNewInputStream(ctxt);
12428 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012429 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012430 xmlFreeParserCtxt(ctxt);
12431 return(NULL);
12432 }
12433
12434 input->filename = NULL;
12435 input->buf = buf;
12436 input->base = input->buf->buffer->content;
12437 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012438 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012439
12440 inputPush(ctxt, input);
12441 return(ctxt);
12442}
12443
Daniel Veillard81273902003-09-30 00:43:48 +000012444#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012445/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012446 * xmlSAXParseMemoryWithData:
12447 * @sax: the SAX handler block
12448 * @buffer: an pointer to a char array
12449 * @size: the size of the array
12450 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12451 * documents
12452 * @data: the userdata
12453 *
12454 * parse an XML in-memory block and use the given SAX function block
12455 * to handle the parsing callback. If sax is NULL, fallback to the default
12456 * DOM tree building routines.
12457 *
12458 * User data (void *) is stored within the parser context in the
12459 * context's _private member, so it is available nearly everywhere in libxml
12460 *
12461 * Returns the resulting document tree
12462 */
12463
12464xmlDocPtr
12465xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12466 int size, int recovery, void *data) {
12467 xmlDocPtr ret;
12468 xmlParserCtxtPtr ctxt;
12469
12470 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12471 if (ctxt == NULL) return(NULL);
12472 if (sax != NULL) {
12473 if (ctxt->sax != NULL)
12474 xmlFree(ctxt->sax);
12475 ctxt->sax = sax;
12476 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012477 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012478 if (data!=NULL) {
12479 ctxt->_private=data;
12480 }
12481
Daniel Veillardadba5f12003-04-04 16:09:01 +000012482 ctxt->recovery = recovery;
12483
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012484 xmlParseDocument(ctxt);
12485
12486 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12487 else {
12488 ret = NULL;
12489 xmlFreeDoc(ctxt->myDoc);
12490 ctxt->myDoc = NULL;
12491 }
12492 if (sax != NULL)
12493 ctxt->sax = NULL;
12494 xmlFreeParserCtxt(ctxt);
12495
12496 return(ret);
12497}
12498
12499/**
Owen Taylor3473f882001-02-23 17:55:21 +000012500 * xmlSAXParseMemory:
12501 * @sax: the SAX handler block
12502 * @buffer: an pointer to a char array
12503 * @size: the size of the array
12504 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12505 * documents
12506 *
12507 * parse an XML in-memory block and use the given SAX function block
12508 * to handle the parsing callback. If sax is NULL, fallback to the default
12509 * DOM tree building routines.
12510 *
12511 * Returns the resulting document tree
12512 */
12513xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012514xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12515 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012516 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012517}
12518
12519/**
12520 * xmlParseMemory:
12521 * @buffer: an pointer to a char array
12522 * @size: the size of the array
12523 *
12524 * parse an XML in-memory block and build a tree.
12525 *
12526 * Returns the resulting document tree
12527 */
12528
Daniel Veillard50822cb2001-07-26 20:05:51 +000012529xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012530 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12531}
12532
12533/**
12534 * xmlRecoverMemory:
12535 * @buffer: an pointer to a char array
12536 * @size: the size of the array
12537 *
12538 * parse an XML in-memory block and build a tree.
12539 * In the case the document is not Well Formed, a tree is built anyway
12540 *
12541 * Returns the resulting document tree
12542 */
12543
Daniel Veillard50822cb2001-07-26 20:05:51 +000012544xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012545 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12546}
12547
12548/**
12549 * xmlSAXUserParseMemory:
12550 * @sax: a SAX handler
12551 * @user_data: The user data returned on SAX callbacks
12552 * @buffer: an in-memory XML document input
12553 * @size: the length of the XML document in bytes
12554 *
12555 * A better SAX parsing routine.
12556 * parse an XML in-memory buffer and call the given SAX handler routines.
12557 *
12558 * Returns 0 in case of success or a error number otherwise
12559 */
12560int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012561 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012562 int ret = 0;
12563 xmlParserCtxtPtr ctxt;
12564 xmlSAXHandlerPtr oldsax = NULL;
12565
Daniel Veillard9e923512002-08-14 08:48:52 +000012566 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012567 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12568 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012569 oldsax = ctxt->sax;
12570 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012571 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012572 if (user_data != NULL)
12573 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012574
12575 xmlParseDocument(ctxt);
12576
12577 if (ctxt->wellFormed)
12578 ret = 0;
12579 else {
12580 if (ctxt->errNo != 0)
12581 ret = ctxt->errNo;
12582 else
12583 ret = -1;
12584 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012585 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012586 if (ctxt->myDoc != NULL) {
12587 xmlFreeDoc(ctxt->myDoc);
12588 ctxt->myDoc = NULL;
12589 }
Owen Taylor3473f882001-02-23 17:55:21 +000012590 xmlFreeParserCtxt(ctxt);
12591
12592 return ret;
12593}
Daniel Veillard81273902003-09-30 00:43:48 +000012594#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012595
12596/**
12597 * xmlCreateDocParserCtxt:
12598 * @cur: a pointer to an array of xmlChar
12599 *
12600 * Creates a parser context for an XML in-memory document.
12601 *
12602 * Returns the new parser context or NULL
12603 */
12604xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012606 int len;
12607
12608 if (cur == NULL)
12609 return(NULL);
12610 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012611 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012612}
12613
Daniel Veillard81273902003-09-30 00:43:48 +000012614#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012615/**
12616 * xmlSAXParseDoc:
12617 * @sax: the SAX handler block
12618 * @cur: a pointer to an array of xmlChar
12619 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12620 * documents
12621 *
12622 * parse an XML in-memory document and build a tree.
12623 * It use the given SAX function block to handle the parsing callback.
12624 * If sax is NULL, fallback to the default DOM tree building routines.
12625 *
12626 * Returns the resulting document tree
12627 */
12628
12629xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012630xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012631 xmlDocPtr ret;
12632 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012633 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012634
Daniel Veillard38936062004-11-04 17:45:11 +000012635 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012636
12637
12638 ctxt = xmlCreateDocParserCtxt(cur);
12639 if (ctxt == NULL) return(NULL);
12640 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012641 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012642 ctxt->sax = sax;
12643 ctxt->userData = NULL;
12644 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012645 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012646
12647 xmlParseDocument(ctxt);
12648 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12649 else {
12650 ret = NULL;
12651 xmlFreeDoc(ctxt->myDoc);
12652 ctxt->myDoc = NULL;
12653 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012654 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012655 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012656 xmlFreeParserCtxt(ctxt);
12657
12658 return(ret);
12659}
12660
12661/**
12662 * xmlParseDoc:
12663 * @cur: a pointer to an array of xmlChar
12664 *
12665 * parse an XML in-memory document and build a tree.
12666 *
12667 * Returns the resulting document tree
12668 */
12669
12670xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012671xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012672 return(xmlSAXParseDoc(NULL, cur, 0));
12673}
Daniel Veillard81273902003-09-30 00:43:48 +000012674#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012675
Daniel Veillard81273902003-09-30 00:43:48 +000012676#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012677/************************************************************************
12678 * *
12679 * Specific function to keep track of entities references *
12680 * and used by the XSLT debugger *
12681 * *
12682 ************************************************************************/
12683
12684static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12685
12686/**
12687 * xmlAddEntityReference:
12688 * @ent : A valid entity
12689 * @firstNode : A valid first node for children of entity
12690 * @lastNode : A valid last node of children entity
12691 *
12692 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12693 */
12694static void
12695xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12696 xmlNodePtr lastNode)
12697{
12698 if (xmlEntityRefFunc != NULL) {
12699 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12700 }
12701}
12702
12703
12704/**
12705 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012706 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012707 *
12708 * Set the function to call call back when a xml reference has been made
12709 */
12710void
12711xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12712{
12713 xmlEntityRefFunc = func;
12714}
Daniel Veillard81273902003-09-30 00:43:48 +000012715#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012716
12717/************************************************************************
12718 * *
12719 * Miscellaneous *
12720 * *
12721 ************************************************************************/
12722
12723#ifdef LIBXML_XPATH_ENABLED
12724#include <libxml/xpath.h>
12725#endif
12726
Daniel Veillardffa3c742005-07-21 13:24:09 +000012727extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012728static int xmlParserInitialized = 0;
12729
12730/**
12731 * xmlInitParser:
12732 *
12733 * Initialization function for the XML parser.
12734 * This is not reentrant. Call once before processing in case of
12735 * use in multithreaded programs.
12736 */
12737
12738void
12739xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012740 if (xmlParserInitialized != 0)
12741 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012742
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012743 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12744 (xmlGenericError == NULL))
12745 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012746 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012747 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012748 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012749 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012750 xmlDefaultSAXHandlerInit();
12751 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012752#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012753 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012754#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012755#ifdef LIBXML_HTML_ENABLED
12756 htmlInitAutoClose();
12757 htmlDefaultSAXHandlerInit();
12758#endif
12759#ifdef LIBXML_XPATH_ENABLED
12760 xmlXPathInit();
12761#endif
12762 xmlParserInitialized = 1;
12763}
12764
12765/**
12766 * xmlCleanupParser:
12767 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012768 * Cleanup function for the XML library. It tries to reclaim all
12769 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012770 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012771 * function should not prevent reusing the library but one should
12772 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012773 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012774 */
12775
12776void
12777xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012778 if (!xmlParserInitialized)
12779 return;
12780
Owen Taylor3473f882001-02-23 17:55:21 +000012781 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012782#ifdef LIBXML_CATALOG_ENABLED
12783 xmlCatalogCleanup();
12784#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012785 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012786 xmlCleanupInputCallbacks();
12787#ifdef LIBXML_OUTPUT_ENABLED
12788 xmlCleanupOutputCallbacks();
12789#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012790#ifdef LIBXML_SCHEMAS_ENABLED
12791 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012792 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012793#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012794 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012795 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012796 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012797 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012798 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012799}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012800
12801/************************************************************************
12802 * *
12803 * New set (2.6.0) of simpler and more flexible APIs *
12804 * *
12805 ************************************************************************/
12806
12807/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012808 * DICT_FREE:
12809 * @str: a string
12810 *
12811 * Free a string if it is not owned by the "dict" dictionnary in the
12812 * current scope
12813 */
12814#define DICT_FREE(str) \
12815 if ((str) && ((!dict) || \
12816 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12817 xmlFree((char *)(str));
12818
12819/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012820 * xmlCtxtReset:
12821 * @ctxt: an XML parser context
12822 *
12823 * Reset a parser context
12824 */
12825void
12826xmlCtxtReset(xmlParserCtxtPtr ctxt)
12827{
12828 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012829 xmlDictPtr dict;
12830
12831 if (ctxt == NULL)
12832 return;
12833
12834 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012835
12836 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12837 xmlFreeInputStream(input);
12838 }
12839 ctxt->inputNr = 0;
12840 ctxt->input = NULL;
12841
12842 ctxt->spaceNr = 0;
12843 ctxt->spaceTab[0] = -1;
12844 ctxt->space = &ctxt->spaceTab[0];
12845
12846
12847 ctxt->nodeNr = 0;
12848 ctxt->node = NULL;
12849
12850 ctxt->nameNr = 0;
12851 ctxt->name = NULL;
12852
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012853 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012854 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012855 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012856 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012857 DICT_FREE(ctxt->directory);
12858 ctxt->directory = NULL;
12859 DICT_FREE(ctxt->extSubURI);
12860 ctxt->extSubURI = NULL;
12861 DICT_FREE(ctxt->extSubSystem);
12862 ctxt->extSubSystem = NULL;
12863 if (ctxt->myDoc != NULL)
12864 xmlFreeDoc(ctxt->myDoc);
12865 ctxt->myDoc = NULL;
12866
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012867 ctxt->standalone = -1;
12868 ctxt->hasExternalSubset = 0;
12869 ctxt->hasPErefs = 0;
12870 ctxt->html = 0;
12871 ctxt->external = 0;
12872 ctxt->instate = XML_PARSER_START;
12873 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012874
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012875 ctxt->wellFormed = 1;
12876 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012877 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012878 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012879#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012880 ctxt->vctxt.userData = ctxt;
12881 ctxt->vctxt.error = xmlParserValidityError;
12882 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012883#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012884 ctxt->record_info = 0;
12885 ctxt->nbChars = 0;
12886 ctxt->checkIndex = 0;
12887 ctxt->inSubset = 0;
12888 ctxt->errNo = XML_ERR_OK;
12889 ctxt->depth = 0;
12890 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12891 ctxt->catalogs = NULL;
12892 xmlInitNodeInfoSeq(&ctxt->node_seq);
12893
12894 if (ctxt->attsDefault != NULL) {
12895 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12896 ctxt->attsDefault = NULL;
12897 }
12898 if (ctxt->attsSpecial != NULL) {
12899 xmlHashFree(ctxt->attsSpecial, NULL);
12900 ctxt->attsSpecial = NULL;
12901 }
12902
Daniel Veillard4432df22003-09-28 18:58:27 +000012903#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012904 if (ctxt->catalogs != NULL)
12905 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012906#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012907 if (ctxt->lastError.code != XML_ERR_OK)
12908 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012909}
12910
12911/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012912 * xmlCtxtResetPush:
12913 * @ctxt: an XML parser context
12914 * @chunk: a pointer to an array of chars
12915 * @size: number of chars in the array
12916 * @filename: an optional file name or URI
12917 * @encoding: the document encoding, or NULL
12918 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012919 * Reset a push parser context
12920 *
12921 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012922 */
12923int
12924xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12925 int size, const char *filename, const char *encoding)
12926{
12927 xmlParserInputPtr inputStream;
12928 xmlParserInputBufferPtr buf;
12929 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12930
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012931 if (ctxt == NULL)
12932 return(1);
12933
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012934 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12935 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12936
12937 buf = xmlAllocParserInputBuffer(enc);
12938 if (buf == NULL)
12939 return(1);
12940
12941 if (ctxt == NULL) {
12942 xmlFreeParserInputBuffer(buf);
12943 return(1);
12944 }
12945
12946 xmlCtxtReset(ctxt);
12947
12948 if (ctxt->pushTab == NULL) {
12949 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12950 sizeof(xmlChar *));
12951 if (ctxt->pushTab == NULL) {
12952 xmlErrMemory(ctxt, NULL);
12953 xmlFreeParserInputBuffer(buf);
12954 return(1);
12955 }
12956 }
12957
12958 if (filename == NULL) {
12959 ctxt->directory = NULL;
12960 } else {
12961 ctxt->directory = xmlParserGetDirectory(filename);
12962 }
12963
12964 inputStream = xmlNewInputStream(ctxt);
12965 if (inputStream == NULL) {
12966 xmlFreeParserInputBuffer(buf);
12967 return(1);
12968 }
12969
12970 if (filename == NULL)
12971 inputStream->filename = NULL;
12972 else
12973 inputStream->filename = (char *)
12974 xmlCanonicPath((const xmlChar *) filename);
12975 inputStream->buf = buf;
12976 inputStream->base = inputStream->buf->buffer->content;
12977 inputStream->cur = inputStream->buf->buffer->content;
12978 inputStream->end =
12979 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12980
12981 inputPush(ctxt, inputStream);
12982
12983 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12984 (ctxt->input->buf != NULL)) {
12985 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12986 int cur = ctxt->input->cur - ctxt->input->base;
12987
12988 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12989
12990 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12991 ctxt->input->cur = ctxt->input->base + cur;
12992 ctxt->input->end =
12993 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12994 use];
12995#ifdef DEBUG_PUSH
12996 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12997#endif
12998 }
12999
13000 if (encoding != NULL) {
13001 xmlCharEncodingHandlerPtr hdlr;
13002
13003 hdlr = xmlFindCharEncodingHandler(encoding);
13004 if (hdlr != NULL) {
13005 xmlSwitchToEncoding(ctxt, hdlr);
13006 } else {
13007 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13008 "Unsupported encoding %s\n", BAD_CAST encoding);
13009 }
13010 } else if (enc != XML_CHAR_ENCODING_NONE) {
13011 xmlSwitchEncoding(ctxt, enc);
13012 }
13013
13014 return(0);
13015}
13016
13017/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013018 * xmlCtxtUseOptions:
13019 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013020 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013021 *
13022 * Applies the options to the parser context
13023 *
13024 * Returns 0 in case of success, the set of unknown or unimplemented options
13025 * in case of error.
13026 */
13027int
13028xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13029{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013030 if (ctxt == NULL)
13031 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013032 if (options & XML_PARSE_RECOVER) {
13033 ctxt->recovery = 1;
13034 options -= XML_PARSE_RECOVER;
13035 } else
13036 ctxt->recovery = 0;
13037 if (options & XML_PARSE_DTDLOAD) {
13038 ctxt->loadsubset = XML_DETECT_IDS;
13039 options -= XML_PARSE_DTDLOAD;
13040 } else
13041 ctxt->loadsubset = 0;
13042 if (options & XML_PARSE_DTDATTR) {
13043 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13044 options -= XML_PARSE_DTDATTR;
13045 }
13046 if (options & XML_PARSE_NOENT) {
13047 ctxt->replaceEntities = 1;
13048 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13049 options -= XML_PARSE_NOENT;
13050 } else
13051 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013052 if (options & XML_PARSE_PEDANTIC) {
13053 ctxt->pedantic = 1;
13054 options -= XML_PARSE_PEDANTIC;
13055 } else
13056 ctxt->pedantic = 0;
13057 if (options & XML_PARSE_NOBLANKS) {
13058 ctxt->keepBlanks = 0;
13059 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13060 options -= XML_PARSE_NOBLANKS;
13061 } else
13062 ctxt->keepBlanks = 1;
13063 if (options & XML_PARSE_DTDVALID) {
13064 ctxt->validate = 1;
13065 if (options & XML_PARSE_NOWARNING)
13066 ctxt->vctxt.warning = NULL;
13067 if (options & XML_PARSE_NOERROR)
13068 ctxt->vctxt.error = NULL;
13069 options -= XML_PARSE_DTDVALID;
13070 } else
13071 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013072 if (options & XML_PARSE_NOWARNING) {
13073 ctxt->sax->warning = NULL;
13074 options -= XML_PARSE_NOWARNING;
13075 }
13076 if (options & XML_PARSE_NOERROR) {
13077 ctxt->sax->error = NULL;
13078 ctxt->sax->fatalError = NULL;
13079 options -= XML_PARSE_NOERROR;
13080 }
Daniel Veillard81273902003-09-30 00:43:48 +000013081#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013082 if (options & XML_PARSE_SAX1) {
13083 ctxt->sax->startElement = xmlSAX2StartElement;
13084 ctxt->sax->endElement = xmlSAX2EndElement;
13085 ctxt->sax->startElementNs = NULL;
13086 ctxt->sax->endElementNs = NULL;
13087 ctxt->sax->initialized = 1;
13088 options -= XML_PARSE_SAX1;
13089 }
Daniel Veillard81273902003-09-30 00:43:48 +000013090#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013091 if (options & XML_PARSE_NODICT) {
13092 ctxt->dictNames = 0;
13093 options -= XML_PARSE_NODICT;
13094 } else {
13095 ctxt->dictNames = 1;
13096 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013097 if (options & XML_PARSE_NOCDATA) {
13098 ctxt->sax->cdataBlock = NULL;
13099 options -= XML_PARSE_NOCDATA;
13100 }
13101 if (options & XML_PARSE_NSCLEAN) {
13102 ctxt->options |= XML_PARSE_NSCLEAN;
13103 options -= XML_PARSE_NSCLEAN;
13104 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013105 if (options & XML_PARSE_NONET) {
13106 ctxt->options |= XML_PARSE_NONET;
13107 options -= XML_PARSE_NONET;
13108 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013109 if (options & XML_PARSE_COMPACT) {
13110 ctxt->options |= XML_PARSE_COMPACT;
13111 options -= XML_PARSE_COMPACT;
13112 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013113 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013114 return (options);
13115}
13116
13117/**
13118 * xmlDoRead:
13119 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013120 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013121 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013122 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013123 * @reuse: keep the context for reuse
13124 *
13125 * Common front-end for the xmlRead functions
13126 *
13127 * Returns the resulting document tree or NULL
13128 */
13129static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013130xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13131 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013132{
13133 xmlDocPtr ret;
13134
13135 xmlCtxtUseOptions(ctxt, options);
13136 if (encoding != NULL) {
13137 xmlCharEncodingHandlerPtr hdlr;
13138
13139 hdlr = xmlFindCharEncodingHandler(encoding);
13140 if (hdlr != NULL)
13141 xmlSwitchToEncoding(ctxt, hdlr);
13142 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013143 if ((URL != NULL) && (ctxt->input != NULL) &&
13144 (ctxt->input->filename == NULL))
13145 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013146 xmlParseDocument(ctxt);
13147 if ((ctxt->wellFormed) || ctxt->recovery)
13148 ret = ctxt->myDoc;
13149 else {
13150 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013151 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013152 xmlFreeDoc(ctxt->myDoc);
13153 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013154 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013155 ctxt->myDoc = NULL;
13156 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013157 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013158 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013159
13160 return (ret);
13161}
13162
13163/**
13164 * xmlReadDoc:
13165 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013166 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013168 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013169 *
13170 * parse an XML in-memory document and build a tree.
13171 *
13172 * Returns the resulting document tree
13173 */
13174xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013175xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013176{
13177 xmlParserCtxtPtr ctxt;
13178
13179 if (cur == NULL)
13180 return (NULL);
13181
13182 ctxt = xmlCreateDocParserCtxt(cur);
13183 if (ctxt == NULL)
13184 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013185 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013186}
13187
13188/**
13189 * xmlReadFile:
13190 * @filename: a file or URL
13191 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013192 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013193 *
13194 * parse an XML file from the filesystem or the network.
13195 *
13196 * Returns the resulting document tree
13197 */
13198xmlDocPtr
13199xmlReadFile(const char *filename, const char *encoding, int options)
13200{
13201 xmlParserCtxtPtr ctxt;
13202
Daniel Veillard61b93382003-11-03 14:28:31 +000013203 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013204 if (ctxt == NULL)
13205 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013206 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013207}
13208
13209/**
13210 * xmlReadMemory:
13211 * @buffer: a pointer to a char array
13212 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013213 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013214 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013215 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013216 *
13217 * parse an XML in-memory document and build a tree.
13218 *
13219 * Returns the resulting document tree
13220 */
13221xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013222xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013223{
13224 xmlParserCtxtPtr ctxt;
13225
13226 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13227 if (ctxt == NULL)
13228 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013229 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013230}
13231
13232/**
13233 * xmlReadFd:
13234 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013235 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013237 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013238 *
13239 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013240 * NOTE that the file descriptor will not be closed when the
13241 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013242 *
13243 * Returns the resulting document tree
13244 */
13245xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013246xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013247{
13248 xmlParserCtxtPtr ctxt;
13249 xmlParserInputBufferPtr input;
13250 xmlParserInputPtr stream;
13251
13252 if (fd < 0)
13253 return (NULL);
13254
13255 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13256 if (input == NULL)
13257 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013258 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013259 ctxt = xmlNewParserCtxt();
13260 if (ctxt == NULL) {
13261 xmlFreeParserInputBuffer(input);
13262 return (NULL);
13263 }
13264 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13265 if (stream == NULL) {
13266 xmlFreeParserInputBuffer(input);
13267 xmlFreeParserCtxt(ctxt);
13268 return (NULL);
13269 }
13270 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013271 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013272}
13273
13274/**
13275 * xmlReadIO:
13276 * @ioread: an I/O read function
13277 * @ioclose: an I/O close function
13278 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013279 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013281 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013282 *
13283 * parse an XML document from I/O functions and source and build a tree.
13284 *
13285 * Returns the resulting document tree
13286 */
13287xmlDocPtr
13288xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013289 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013290{
13291 xmlParserCtxtPtr ctxt;
13292 xmlParserInputBufferPtr input;
13293 xmlParserInputPtr stream;
13294
13295 if (ioread == NULL)
13296 return (NULL);
13297
13298 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13299 XML_CHAR_ENCODING_NONE);
13300 if (input == NULL)
13301 return (NULL);
13302 ctxt = xmlNewParserCtxt();
13303 if (ctxt == NULL) {
13304 xmlFreeParserInputBuffer(input);
13305 return (NULL);
13306 }
13307 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13308 if (stream == NULL) {
13309 xmlFreeParserInputBuffer(input);
13310 xmlFreeParserCtxt(ctxt);
13311 return (NULL);
13312 }
13313 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013314 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013315}
13316
13317/**
13318 * xmlCtxtReadDoc:
13319 * @ctxt: an XML parser context
13320 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013321 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013322 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013323 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013324 *
13325 * parse an XML in-memory document and build a tree.
13326 * This reuses the existing @ctxt parser context
13327 *
13328 * Returns the resulting document tree
13329 */
13330xmlDocPtr
13331xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013332 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013333{
13334 xmlParserInputPtr stream;
13335
13336 if (cur == NULL)
13337 return (NULL);
13338 if (ctxt == NULL)
13339 return (NULL);
13340
13341 xmlCtxtReset(ctxt);
13342
13343 stream = xmlNewStringInputStream(ctxt, cur);
13344 if (stream == NULL) {
13345 return (NULL);
13346 }
13347 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013348 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013349}
13350
13351/**
13352 * xmlCtxtReadFile:
13353 * @ctxt: an XML parser context
13354 * @filename: a file or URL
13355 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013356 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013357 *
13358 * parse an XML file from the filesystem or the network.
13359 * This reuses the existing @ctxt parser context
13360 *
13361 * Returns the resulting document tree
13362 */
13363xmlDocPtr
13364xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13365 const char *encoding, int options)
13366{
13367 xmlParserInputPtr stream;
13368
13369 if (filename == NULL)
13370 return (NULL);
13371 if (ctxt == NULL)
13372 return (NULL);
13373
13374 xmlCtxtReset(ctxt);
13375
Daniel Veillard29614c72004-11-26 10:47:26 +000013376 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013377 if (stream == NULL) {
13378 return (NULL);
13379 }
13380 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013381 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013382}
13383
13384/**
13385 * xmlCtxtReadMemory:
13386 * @ctxt: an XML parser context
13387 * @buffer: a pointer to a char array
13388 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013389 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013390 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013391 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013392 *
13393 * parse an XML in-memory document and build a tree.
13394 * This reuses the existing @ctxt parser context
13395 *
13396 * Returns the resulting document tree
13397 */
13398xmlDocPtr
13399xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013400 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013401{
13402 xmlParserInputBufferPtr input;
13403 xmlParserInputPtr stream;
13404
13405 if (ctxt == NULL)
13406 return (NULL);
13407 if (buffer == NULL)
13408 return (NULL);
13409
13410 xmlCtxtReset(ctxt);
13411
13412 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13413 if (input == NULL) {
13414 return(NULL);
13415 }
13416
13417 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13418 if (stream == NULL) {
13419 xmlFreeParserInputBuffer(input);
13420 return(NULL);
13421 }
13422
13423 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013424 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013425}
13426
13427/**
13428 * xmlCtxtReadFd:
13429 * @ctxt: an XML parser context
13430 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013431 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013432 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013433 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013434 *
13435 * parse an XML from a file descriptor and build a tree.
13436 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013437 * NOTE that the file descriptor will not be closed when the
13438 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013439 *
13440 * Returns the resulting document tree
13441 */
13442xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013443xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13444 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013445{
13446 xmlParserInputBufferPtr input;
13447 xmlParserInputPtr stream;
13448
13449 if (fd < 0)
13450 return (NULL);
13451 if (ctxt == NULL)
13452 return (NULL);
13453
13454 xmlCtxtReset(ctxt);
13455
13456
13457 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13458 if (input == NULL)
13459 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013460 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013461 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13462 if (stream == NULL) {
13463 xmlFreeParserInputBuffer(input);
13464 return (NULL);
13465 }
13466 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013467 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013468}
13469
13470/**
13471 * xmlCtxtReadIO:
13472 * @ctxt: an XML parser context
13473 * @ioread: an I/O read function
13474 * @ioclose: an I/O close function
13475 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013476 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013477 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013478 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013479 *
13480 * parse an XML document from I/O functions and source and build a tree.
13481 * This reuses the existing @ctxt parser context
13482 *
13483 * Returns the resulting document tree
13484 */
13485xmlDocPtr
13486xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13487 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013488 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013489 const char *encoding, int options)
13490{
13491 xmlParserInputBufferPtr input;
13492 xmlParserInputPtr stream;
13493
13494 if (ioread == NULL)
13495 return (NULL);
13496 if (ctxt == NULL)
13497 return (NULL);
13498
13499 xmlCtxtReset(ctxt);
13500
13501 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13502 XML_CHAR_ENCODING_NONE);
13503 if (input == NULL)
13504 return (NULL);
13505 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13506 if (stream == NULL) {
13507 xmlFreeParserInputBuffer(input);
13508 return (NULL);
13509 }
13510 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013511 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013512}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013513
13514#define bottom_parser
13515#include "elfgcchack.h"