blob: a79e27a71f9ca28fe7be3e72f3dd046ffbdd0700 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 /* nasty but well=formed
2496 if ((c == ':') && (*cur == 0)) {
2497 return(xmlStrdup(name));
2498 } */
2499
Owen Taylor3473f882001-02-23 17:55:21 +00002500 if (buffer == NULL)
2501 ret = xmlStrndup(buf, len);
2502 else {
2503 ret = buffer;
2504 buffer = NULL;
2505 max = XML_MAX_NAMELEN;
2506 }
2507
2508
2509 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002510 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002511 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002512 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002513 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 }
Owen Taylor3473f882001-02-23 17:55:21 +00002515 len = 0;
2516
Daniel Veillardbb284f42002-10-16 18:02:47 +00002517 /*
2518 * Check that the first character is proper to start
2519 * a new name
2520 */
2521 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2522 ((c >= 0x41) && (c <= 0x5A)) ||
2523 (c == '_') || (c == ':'))) {
2524 int l;
2525 int first = CUR_SCHAR(cur, l);
2526
2527 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002528 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002529 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 }
2532 }
2533 cur++;
2534
Owen Taylor3473f882001-02-23 17:55:21 +00002535 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2536 buf[len++] = c;
2537 c = *cur++;
2538 }
2539 if (len >= max) {
2540 /*
2541 * Okay someone managed to make a huge name, so he's ready to pay
2542 * for the processing speed.
2543 */
2544 max = len * 2;
2545
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002546 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002548 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002549 return(NULL);
2550 }
2551 memcpy(buffer, buf, len);
2552 while (c != 0) { /* tested bigname2.xml */
2553 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002554 xmlChar *tmp;
2555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002557 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002560 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002562 return(NULL);
2563 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002564 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002565 }
2566 buffer[len++] = c;
2567 c = *cur++;
2568 }
2569 buffer[len] = 0;
2570 }
2571
2572 if (buffer == NULL)
2573 ret = xmlStrndup(buf, len);
2574 else {
2575 ret = buffer;
2576 }
2577 }
2578
2579 return(ret);
2580}
2581
2582/************************************************************************
2583 * *
2584 * The parser itself *
2585 * Relates to http://www.w3.org/TR/REC-xml *
2586 * *
2587 ************************************************************************/
2588
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002589static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002590static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002591 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002592
Owen Taylor3473f882001-02-23 17:55:21 +00002593/**
2594 * xmlParseName:
2595 * @ctxt: an XML parser context
2596 *
2597 * parse an XML name.
2598 *
2599 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2600 * CombiningChar | Extender
2601 *
2602 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2603 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002604 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002605 *
2606 * Returns the Name parsed or NULL
2607 */
2608
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002609const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002610xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002612 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002613 int count = 0;
2614
2615 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002616
2617 /*
2618 * Accelerator for simple ASCII names
2619 */
2620 in = ctxt->input->cur;
2621 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2622 ((*in >= 0x41) && (*in <= 0x5A)) ||
2623 (*in == '_') || (*in == ':')) {
2624 in++;
2625 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2626 ((*in >= 0x41) && (*in <= 0x5A)) ||
2627 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002628 (*in == '_') || (*in == '-') ||
2629 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002630 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002631 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002633 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002635 ctxt->nbChars += count;
2636 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002637 if (ret == NULL)
2638 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002639 return(ret);
2640 }
2641 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002642 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002643}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644
Daniel Veillard46de64e2002-05-29 08:21:33 +00002645/**
2646 * xmlParseNameAndCompare:
2647 * @ctxt: an XML parser context
2648 *
2649 * parse an XML name and compares for match
2650 * (specialized for endtag parsing)
2651 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002652 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2653 * and the name for mismatch
2654 */
2655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002656static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002657xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002658 register const xmlChar *cmp = other;
2659 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002660 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002661
2662 GROW;
2663
2664 in = ctxt->input->cur;
2665 while (*in != 0 && *in == *cmp) {
2666 ++in;
2667 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002668 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002669 }
William M. Brack76e95df2003-10-18 16:20:14 +00002670 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 /* success */
2672 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002673 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002674 }
2675 /* failure (or end of input buffer), check with full function */
2676 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002677 /* strings coming from the dictionnary direct compare possible */
2678 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002679 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002680 }
2681 return ret;
2682}
2683
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002685xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002686 int len = 0, l;
2687 int c;
2688 int count = 0;
2689
2690 /*
2691 * Handler for more complex cases
2692 */
2693 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002694 c = CUR_CHAR(l);
2695 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2696 (!IS_LETTER(c) && (c != '_') &&
2697 (c != ':'))) {
2698 return(NULL);
2699 }
2700
2701 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002702 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002703 (c == '.') || (c == '-') ||
2704 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002705 (IS_COMBINING(c)) ||
2706 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (count++ > 100) {
2708 count = 0;
2709 GROW;
2710 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002711 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002712 NEXTL(l);
2713 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002714 }
Daniel Veillard96688262005-08-23 18:14:12 +00002715 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2716 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002717 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002718}
2719
2720/**
2721 * xmlParseStringName:
2722 * @ctxt: an XML parser context
2723 * @str: a pointer to the string pointer (IN/OUT)
2724 *
2725 * parse an XML name.
2726 *
2727 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2728 * CombiningChar | Extender
2729 *
2730 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2731 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002732 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002733 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002734 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002735 * is updated to the current location in the string.
2736 */
2737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002738static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002739xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2740 xmlChar buf[XML_MAX_NAMELEN + 5];
2741 const xmlChar *cur = *str;
2742 int len = 0, l;
2743 int c;
2744
2745 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002746 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002747 (c != ':')) {
2748 return(NULL);
2749 }
2750
William M. Brack871611b2003-10-18 04:53:14 +00002751 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002752 (c == '.') || (c == '-') ||
2753 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002754 (IS_COMBINING(c)) ||
2755 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002756 COPY_BUF(l,buf,len,c);
2757 cur += l;
2758 c = CUR_SCHAR(cur, l);
2759 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2760 /*
2761 * Okay someone managed to make a huge name, so he's ready to pay
2762 * for the processing speed.
2763 */
2764 xmlChar *buffer;
2765 int max = len * 2;
2766
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002767 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002768 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002769 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002770 return(NULL);
2771 }
2772 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002773 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002774 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002775 (c == '.') || (c == '-') ||
2776 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002777 (IS_COMBINING(c)) ||
2778 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002779 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002780 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002781 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002785 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002787 return(NULL);
2788 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002789 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002790 }
2791 COPY_BUF(l,buffer,len,c);
2792 cur += l;
2793 c = CUR_SCHAR(cur, l);
2794 }
2795 buffer[len] = 0;
2796 *str = cur;
2797 return(buffer);
2798 }
2799 }
2800 *str = cur;
2801 return(xmlStrndup(buf, len));
2802}
2803
2804/**
2805 * xmlParseNmtoken:
2806 * @ctxt: an XML parser context
2807 *
2808 * parse an XML Nmtoken.
2809 *
2810 * [7] Nmtoken ::= (NameChar)+
2811 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002812 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002813 *
2814 * Returns the Nmtoken parsed or NULL
2815 */
2816
2817xmlChar *
2818xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2819 xmlChar buf[XML_MAX_NAMELEN + 5];
2820 int len = 0, l;
2821 int c;
2822 int count = 0;
2823
2824 GROW;
2825 c = CUR_CHAR(l);
2826
William M. Brack871611b2003-10-18 04:53:14 +00002827 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002828 (c == '.') || (c == '-') ||
2829 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002830 (IS_COMBINING(c)) ||
2831 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (count++ > 100) {
2833 count = 0;
2834 GROW;
2835 }
2836 COPY_BUF(l,buf,len,c);
2837 NEXTL(l);
2838 c = CUR_CHAR(l);
2839 if (len >= XML_MAX_NAMELEN) {
2840 /*
2841 * Okay someone managed to make a huge token, so he's ready to pay
2842 * for the processing speed.
2843 */
2844 xmlChar *buffer;
2845 int max = len * 2;
2846
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002847 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002848 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002849 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002850 return(NULL);
2851 }
2852 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002853 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002854 (c == '.') || (c == '-') ||
2855 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002856 (IS_COMBINING(c)) ||
2857 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002858 if (count++ > 100) {
2859 count = 0;
2860 GROW;
2861 }
2862 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002863 xmlChar *tmp;
2864
Owen Taylor3473f882001-02-23 17:55:21 +00002865 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002866 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002869 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002871 return(NULL);
2872 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002873 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002874 }
2875 COPY_BUF(l,buffer,len,c);
2876 NEXTL(l);
2877 c = CUR_CHAR(l);
2878 }
2879 buffer[len] = 0;
2880 return(buffer);
2881 }
2882 }
2883 if (len == 0)
2884 return(NULL);
2885 return(xmlStrndup(buf, len));
2886}
2887
2888/**
2889 * xmlParseEntityValue:
2890 * @ctxt: an XML parser context
2891 * @orig: if non-NULL store a copy of the original entity value
2892 *
2893 * parse a value for ENTITY declarations
2894 *
2895 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2896 * "'" ([^%&'] | PEReference | Reference)* "'"
2897 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002898 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002899 */
2900
2901xmlChar *
2902xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2903 xmlChar *buf = NULL;
2904 int len = 0;
2905 int size = XML_PARSER_BUFFER_SIZE;
2906 int c, l;
2907 xmlChar stop;
2908 xmlChar *ret = NULL;
2909 const xmlChar *cur = NULL;
2910 xmlParserInputPtr input;
2911
2912 if (RAW == '"') stop = '"';
2913 else if (RAW == '\'') stop = '\'';
2914 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002915 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002916 return(NULL);
2917 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002918 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002919 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002920 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(NULL);
2922 }
2923
2924 /*
2925 * The content of the entity definition is copied in a buffer.
2926 */
2927
2928 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2929 input = ctxt->input;
2930 GROW;
2931 NEXT;
2932 c = CUR_CHAR(l);
2933 /*
2934 * NOTE: 4.4.5 Included in Literal
2935 * When a parameter entity reference appears in a literal entity
2936 * value, ... a single or double quote character in the replacement
2937 * text is always treated as a normal data character and will not
2938 * terminate the literal.
2939 * In practice it means we stop the loop only when back at parsing
2940 * the initial entity and the quote is found
2941 */
William M. Brack871611b2003-10-18 04:53:14 +00002942 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (ctxt->input != input))) {
2944 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 xmlChar *tmp;
2946
Owen Taylor3473f882001-02-23 17:55:21 +00002947 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002948 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2949 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002950 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002951 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002952 return(NULL);
2953 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002954 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002955 }
2956 COPY_BUF(l,buf,len,c);
2957 NEXTL(l);
2958 /*
2959 * Pop-up of finished entities.
2960 */
2961 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2962 xmlPopInput(ctxt);
2963
2964 GROW;
2965 c = CUR_CHAR(l);
2966 if (c == 0) {
2967 GROW;
2968 c = CUR_CHAR(l);
2969 }
2970 }
2971 buf[len] = 0;
2972
2973 /*
2974 * Raise problem w.r.t. '&' and '%' being used in non-entities
2975 * reference constructs. Note Charref will be handled in
2976 * xmlStringDecodeEntities()
2977 */
2978 cur = buf;
2979 while (*cur != 0) { /* non input consuming */
2980 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2981 xmlChar *name;
2982 xmlChar tmp = *cur;
2983
2984 cur++;
2985 name = xmlParseStringName(ctxt, &cur);
2986 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002987 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002988 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002990 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002991 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2992 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002993 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002994 }
2995 if (name != NULL)
2996 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002997 if (*cur == 0)
2998 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 cur++;
3001 }
3002
3003 /*
3004 * Then PEReference entities are substituted.
3005 */
3006 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003007 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003008 xmlFree(buf);
3009 } else {
3010 NEXT;
3011 /*
3012 * NOTE: 4.4.7 Bypassed
3013 * When a general entity reference appears in the EntityValue in
3014 * an entity declaration, it is bypassed and left as is.
3015 * so XML_SUBSTITUTE_REF is not set here.
3016 */
3017 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3018 0, 0, 0);
3019 if (orig != NULL)
3020 *orig = buf;
3021 else
3022 xmlFree(buf);
3023 }
3024
3025 return(ret);
3026}
3027
3028/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003029 * xmlParseAttValueComplex:
3030 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003031 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003032 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003033 *
3034 * parse a value for an attribute, this is the fallback function
3035 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003036 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003037 *
3038 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3039 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003040static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003041xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003042 xmlChar limit = 0;
3043 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003044 int len = 0;
3045 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003046 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003047 xmlChar *current = NULL;
3048 xmlEntityPtr ent;
3049
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (NXT(0) == '"') {
3051 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3052 limit = '"';
3053 NEXT;
3054 } else if (NXT(0) == '\'') {
3055 limit = '\'';
3056 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3057 NEXT;
3058 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003059 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003060 return(NULL);
3061 }
3062
3063 /*
3064 * allocate a translation buffer.
3065 */
3066 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003067 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003068 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003069
3070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003071 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003072 */
3073 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003074 while ((NXT(0) != limit) && /* checked */
3075 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003077 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003078 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003079 if (NXT(1) == '#') {
3080 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003081
Owen Taylor3473f882001-02-23 17:55:21 +00003082 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003083 if (ctxt->replaceEntities) {
3084 if (len > buf_size - 10) {
3085 growBuffer(buf);
3086 }
3087 buf[len++] = '&';
3088 } else {
3089 /*
3090 * The reparsing will be done in xmlStringGetNodeList()
3091 * called by the attribute() function in SAX.c
3092 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003093 if (len > buf_size - 10) {
3094 growBuffer(buf);
3095 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003096 buf[len++] = '&';
3097 buf[len++] = '#';
3098 buf[len++] = '3';
3099 buf[len++] = '8';
3100 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003101 }
3102 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003103 if (len > buf_size - 10) {
3104 growBuffer(buf);
3105 }
Owen Taylor3473f882001-02-23 17:55:21 +00003106 len += xmlCopyChar(0, &buf[len], val);
3107 }
3108 } else {
3109 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003110 if ((ent != NULL) &&
3111 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3112 if (len > buf_size - 10) {
3113 growBuffer(buf);
3114 }
3115 if ((ctxt->replaceEntities == 0) &&
3116 (ent->content[0] == '&')) {
3117 buf[len++] = '&';
3118 buf[len++] = '#';
3119 buf[len++] = '3';
3120 buf[len++] = '8';
3121 buf[len++] = ';';
3122 } else {
3123 buf[len++] = ent->content[0];
3124 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003125 } else if ((ent != NULL) &&
3126 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003127 xmlChar *rep;
3128
3129 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3130 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003131 XML_SUBSTITUTE_REF,
3132 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (rep != NULL) {
3134 current = rep;
3135 while (*current != 0) { /* non input consuming */
3136 buf[len++] = *current++;
3137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
3140 }
3141 xmlFree(rep);
3142 }
3143 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003144 if (len > buf_size - 10) {
3145 growBuffer(buf);
3146 }
Owen Taylor3473f882001-02-23 17:55:21 +00003147 if (ent->content != NULL)
3148 buf[len++] = ent->content[0];
3149 }
3150 } else if (ent != NULL) {
3151 int i = xmlStrlen(ent->name);
3152 const xmlChar *cur = ent->name;
3153
3154 /*
3155 * This may look absurd but is needed to detect
3156 * entities problems
3157 */
3158 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3159 (ent->content != NULL)) {
3160 xmlChar *rep;
3161 rep = xmlStringDecodeEntities(ctxt, ent->content,
3162 XML_SUBSTITUTE_REF, 0, 0, 0);
3163 if (rep != NULL)
3164 xmlFree(rep);
3165 }
3166
3167 /*
3168 * Just output the reference
3169 */
3170 buf[len++] = '&';
3171 if (len > buf_size - i - 10) {
3172 growBuffer(buf);
3173 }
3174 for (;i > 0;i--)
3175 buf[len++] = *cur++;
3176 buf[len++] = ';';
3177 }
3178 }
3179 } else {
3180 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003181 if ((len != 0) || (!normalize)) {
3182 if ((!normalize) || (!in_space)) {
3183 COPY_BUF(l,buf,len,0x20);
3184 if (len > buf_size - 10) {
3185 growBuffer(buf);
3186 }
3187 }
3188 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003189 }
3190 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003191 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003192 COPY_BUF(l,buf,len,c);
3193 if (len > buf_size - 10) {
3194 growBuffer(buf);
3195 }
3196 }
3197 NEXTL(l);
3198 }
3199 GROW;
3200 c = CUR_CHAR(l);
3201 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 if ((in_space) && (normalize)) {
3203 while (buf[len - 1] == 0x20) len--;
3204 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003205 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003206 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003207 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003208 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003209 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3210 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003211 } else
3212 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003213 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003214 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215
3216mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003217 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003218 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003219}
3220
3221/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003222 * xmlParseAttValue:
3223 * @ctxt: an XML parser context
3224 *
3225 * parse a value for an attribute
3226 * Note: the parser won't do substitution of entities here, this
3227 * will be handled later in xmlStringGetNodeList
3228 *
3229 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3230 * "'" ([^<&'] | Reference)* "'"
3231 *
3232 * 3.3.3 Attribute-Value Normalization:
3233 * Before the value of an attribute is passed to the application or
3234 * checked for validity, the XML processor must normalize it as follows:
3235 * - a character reference is processed by appending the referenced
3236 * character to the attribute value
3237 * - an entity reference is processed by recursively processing the
3238 * replacement text of the entity
3239 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3240 * appending #x20 to the normalized value, except that only a single
3241 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3242 * parsed entity or the literal entity value of an internal parsed entity
3243 * - other characters are processed by appending them to the normalized value
3244 * If the declared value is not CDATA, then the XML processor must further
3245 * process the normalized attribute value by discarding any leading and
3246 * trailing space (#x20) characters, and by replacing sequences of space
3247 * (#x20) characters by a single space (#x20) character.
3248 * All attributes for which no declaration has been read should be treated
3249 * by a non-validating parser as if declared CDATA.
3250 *
3251 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3252 */
3253
3254
3255xmlChar *
3256xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003257 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003258 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003259}
3260
3261/**
Owen Taylor3473f882001-02-23 17:55:21 +00003262 * xmlParseSystemLiteral:
3263 * @ctxt: an XML parser context
3264 *
3265 * parse an XML Literal
3266 *
3267 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3268 *
3269 * Returns the SystemLiteral parsed or NULL
3270 */
3271
3272xmlChar *
3273xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3274 xmlChar *buf = NULL;
3275 int len = 0;
3276 int size = XML_PARSER_BUFFER_SIZE;
3277 int cur, l;
3278 xmlChar stop;
3279 int state = ctxt->instate;
3280 int count = 0;
3281
3282 SHRINK;
3283 if (RAW == '"') {
3284 NEXT;
3285 stop = '"';
3286 } else if (RAW == '\'') {
3287 NEXT;
3288 stop = '\'';
3289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003291 return(NULL);
3292 }
3293
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003296 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003297 return(NULL);
3298 }
3299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3300 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003303 xmlChar *tmp;
3304
Owen Taylor3473f882001-02-23 17:55:21 +00003305 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003306 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3307 if (tmp == NULL) {
3308 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003309 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003310 ctxt->instate = (xmlParserInputState) state;
3311 return(NULL);
3312 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003313 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 count++;
3316 if (count > 50) {
3317 GROW;
3318 count = 0;
3319 }
3320 COPY_BUF(l,buf,len,cur);
3321 NEXTL(l);
3322 cur = CUR_CHAR(l);
3323 if (cur == 0) {
3324 GROW;
3325 SHRINK;
3326 cur = CUR_CHAR(l);
3327 }
3328 }
3329 buf[len] = 0;
3330 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003331 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003332 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003333 } else {
3334 NEXT;
3335 }
3336 return(buf);
3337}
3338
3339/**
3340 * xmlParsePubidLiteral:
3341 * @ctxt: an XML parser context
3342 *
3343 * parse an XML public literal
3344 *
3345 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3346 *
3347 * Returns the PubidLiteral parsed or NULL.
3348 */
3349
3350xmlChar *
3351xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3352 xmlChar *buf = NULL;
3353 int len = 0;
3354 int size = XML_PARSER_BUFFER_SIZE;
3355 xmlChar cur;
3356 xmlChar stop;
3357 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003358 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003359
3360 SHRINK;
3361 if (RAW == '"') {
3362 NEXT;
3363 stop = '"';
3364 } else if (RAW == '\'') {
3365 NEXT;
3366 stop = '\'';
3367 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003368 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003369 return(NULL);
3370 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003373 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003374 return(NULL);
3375 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003376 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003377 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003378 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003380 xmlChar *tmp;
3381
Owen Taylor3473f882001-02-23 17:55:21 +00003382 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003383 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3384 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003385 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003386 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003387 return(NULL);
3388 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003389 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003390 }
3391 buf[len++] = cur;
3392 count++;
3393 if (count > 50) {
3394 GROW;
3395 count = 0;
3396 }
3397 NEXT;
3398 cur = CUR;
3399 if (cur == 0) {
3400 GROW;
3401 SHRINK;
3402 cur = CUR;
3403 }
3404 }
3405 buf[len] = 0;
3406 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003408 } else {
3409 NEXT;
3410 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003411 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003412 return(buf);
3413}
3414
Daniel Veillard48b2f892001-02-25 16:11:03 +00003415void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003416
3417/*
3418 * used for the test in the inner loop of the char data testing
3419 */
3420static const unsigned char test_char_data[256] = {
3421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3422 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3425 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3426 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3427 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3428 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3429 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3430 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3431 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3432 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3433 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3434 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3435 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3436 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3453};
3454
Owen Taylor3473f882001-02-23 17:55:21 +00003455/**
3456 * xmlParseCharData:
3457 * @ctxt: an XML parser context
3458 * @cdata: int indicating whether we are within a CDATA section
3459 *
3460 * parse a CharData section.
3461 * if we are within a CDATA section ']]>' marks an end of section.
3462 *
3463 * The right angle bracket (>) may be represented using the string "&gt;",
3464 * and must, for compatibility, be escaped using "&gt;" or a character
3465 * reference when it appears in the string "]]>" in content, when that
3466 * string is not marking the end of a CDATA section.
3467 *
3468 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3469 */
3470
3471void
3472xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003473 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003474 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003475 int line = ctxt->input->line;
3476 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003477 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003478
3479 SHRINK;
3480 GROW;
3481 /*
3482 * Accelerated common case where input don't need to be
3483 * modified before passing it to the handler.
3484 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003485 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003486 in = ctxt->input->cur;
3487 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003488get_more_space:
3489 while (*in == 0x20) in++;
3490 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003491 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003492 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003493 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003494 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 goto get_more_space;
3496 }
3497 if (*in == '<') {
3498 nbchar = in - ctxt->input->cur;
3499 if (nbchar > 0) {
3500 const xmlChar *tmp = ctxt->input->cur;
3501 ctxt->input->cur = in;
3502
Daniel Veillard34099b42004-11-04 17:34:35 +00003503 if ((ctxt->sax != NULL) &&
3504 (ctxt->sax->ignorableWhitespace !=
3505 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003506 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003507 if (ctxt->sax->ignorableWhitespace != NULL)
3508 ctxt->sax->ignorableWhitespace(ctxt->userData,
3509 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003510 } else {
3511 if (ctxt->sax->characters != NULL)
3512 ctxt->sax->characters(ctxt->userData,
3513 tmp, nbchar);
3514 if (*ctxt->space == -1)
3515 *ctxt->space = -2;
3516 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003517 } else if ((ctxt->sax != NULL) &&
3518 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003519 ctxt->sax->characters(ctxt->userData,
3520 tmp, nbchar);
3521 }
3522 }
3523 return;
3524 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003525
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003526get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003528 while (test_char_data[*in]) {
3529 in++;
3530 ccol++;
3531 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003532 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003533 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003534 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003535 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003536 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003537 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003539 }
3540 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003541 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003542 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003544 return;
3545 }
3546 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003547 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003548 goto get_more;
3549 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003551 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003552 if ((ctxt->sax != NULL) &&
3553 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003554 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003555 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003556 const xmlChar *tmp = ctxt->input->cur;
3557 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003558
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003559 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003560 if (ctxt->sax->ignorableWhitespace != NULL)
3561 ctxt->sax->ignorableWhitespace(ctxt->userData,
3562 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003563 } else {
3564 if (ctxt->sax->characters != NULL)
3565 ctxt->sax->characters(ctxt->userData,
3566 tmp, nbchar);
3567 if (*ctxt->space == -1)
3568 *ctxt->space = -2;
3569 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003570 line = ctxt->input->line;
3571 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003572 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003573 if (ctxt->sax->characters != NULL)
3574 ctxt->sax->characters(ctxt->userData,
3575 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003576 line = ctxt->input->line;
3577 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003578 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003579 }
3580 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003581 if (*in == 0xD) {
3582 in++;
3583 if (*in == 0xA) {
3584 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003585 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003586 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003587 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 in--;
3590 }
3591 if (*in == '<') {
3592 return;
3593 }
3594 if (*in == '&') {
3595 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003596 }
3597 SHRINK;
3598 GROW;
3599 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003600 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003601 nbchar = 0;
3602 }
Daniel Veillard50582112001-03-26 22:52:16 +00003603 ctxt->input->line = line;
3604 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003605 xmlParseCharDataComplex(ctxt, cdata);
3606}
3607
Daniel Veillard01c13b52002-12-10 15:19:08 +00003608/**
3609 * xmlParseCharDataComplex:
3610 * @ctxt: an XML parser context
3611 * @cdata: int indicating whether we are within a CDATA section
3612 *
3613 * parse a CharData section.this is the fallback function
3614 * of xmlParseCharData() when the parsing requires handling
3615 * of non-ASCII characters.
3616 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003617void
3618xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003619 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3620 int nbchar = 0;
3621 int cur, l;
3622 int count = 0;
3623
3624 SHRINK;
3625 GROW;
3626 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003627 while ((cur != '<') && /* checked */
3628 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003629 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if ((cur == ']') && (NXT(1) == ']') &&
3631 (NXT(2) == '>')) {
3632 if (cdata) break;
3633 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003634 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003635 }
3636 }
3637 COPY_BUF(l,buf,nbchar,cur);
3638 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003639 buf[nbchar] = 0;
3640
Owen Taylor3473f882001-02-23 17:55:21 +00003641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003645 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003646 if (ctxt->sax->ignorableWhitespace != NULL)
3647 ctxt->sax->ignorableWhitespace(ctxt->userData,
3648 buf, nbchar);
3649 } else {
3650 if (ctxt->sax->characters != NULL)
3651 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003652 if ((ctxt->sax->characters !=
3653 ctxt->sax->ignorableWhitespace) &&
3654 (*ctxt->space == -1))
3655 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003656 }
3657 }
3658 nbchar = 0;
3659 }
3660 count++;
3661 if (count > 50) {
3662 GROW;
3663 count = 0;
3664 }
3665 NEXTL(l);
3666 cur = CUR_CHAR(l);
3667 }
3668 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003669 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003670 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003671 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003672 */
3673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003674 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003675 if (ctxt->sax->ignorableWhitespace != NULL)
3676 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3677 } else {
3678 if (ctxt->sax->characters != NULL)
3679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003680 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3681 (*ctxt->space == -1))
3682 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
3684 }
3685 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003686 if ((cur != 0) && (!IS_CHAR(cur))) {
3687 /* Generate the error and skip the offending character */
3688 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3689 "PCDATA invalid Char value %d\n",
3690 cur);
3691 NEXTL(l);
3692 }
Owen Taylor3473f882001-02-23 17:55:21 +00003693}
3694
3695/**
3696 * xmlParseExternalID:
3697 * @ctxt: an XML parser context
3698 * @publicID: a xmlChar** receiving PubidLiteral
3699 * @strict: indicate whether we should restrict parsing to only
3700 * production [75], see NOTE below
3701 *
3702 * Parse an External ID or a Public ID
3703 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003704 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003705 * 'PUBLIC' S PubidLiteral S SystemLiteral
3706 *
3707 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3708 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3709 *
3710 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3711 *
3712 * Returns the function returns SystemLiteral and in the second
3713 * case publicID receives PubidLiteral, is strict is off
3714 * it is possible to return NULL and have publicID set.
3715 */
3716
3717xmlChar *
3718xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3719 xmlChar *URI = NULL;
3720
3721 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003722
3723 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003724 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003725 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3728 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 SKIP_BLANKS;
3731 URI = xmlParseSystemLiteral(ctxt);
3732 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003733 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003735 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003737 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003738 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003739 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003740 }
3741 SKIP_BLANKS;
3742 *publicID = xmlParsePubidLiteral(ctxt);
3743 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003744 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 if (strict) {
3747 /*
3748 * We don't handle [83] so "S SystemLiteral" is required.
3749 */
William M. Brack76e95df2003-10-18 16:20:14 +00003750 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003751 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003752 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 } else {
3755 /*
3756 * We handle [83] so we return immediately, if
3757 * "S SystemLiteral" is not detected. From a purely parsing
3758 * point of view that's a nice mess.
3759 */
3760 const xmlChar *ptr;
3761 GROW;
3762
3763 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003764 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765
William M. Brack76e95df2003-10-18 16:20:14 +00003766 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3768 }
3769 SKIP_BLANKS;
3770 URI = xmlParseSystemLiteral(ctxt);
3771 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003772 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003773 }
3774 }
3775 return(URI);
3776}
3777
3778/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003779 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003780 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * @buf: the already parsed part of the buffer
3782 * @len: number of bytes filles in the buffer
3783 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003784 *
3785 * Skip an XML (SGML) comment <!-- .... -->
3786 * The spec says that "For compatibility, the string "--" (double-hyphen)
3787 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003788 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003789 *
3790 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3791 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003792static void
3793xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003794 int q, ql;
3795 int r, rl;
3796 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 xmlParserInputPtr input = ctxt->input;
3798 int count = 0;
3799
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003801 len = 0;
3802 size = XML_PARSER_BUFFER_SIZE;
3803 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3804 if (buf == NULL) {
3805 xmlErrMemory(ctxt, NULL);
3806 return;
3807 }
Owen Taylor3473f882001-02-23 17:55:21 +00003808 }
3809 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003810 if (q == 0)
3811 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003812 NEXTL(ql);
3813 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003814 if (r == 0)
3815 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 NEXTL(rl);
3817 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003818 if (cur == 0)
3819 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003821 ((cur != '>') ||
3822 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003823 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003824 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003827 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003828 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003829 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3830 if (new_buf == NULL) {
3831 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003832 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 return;
3834 }
William M. Bracka3215c72004-07-31 16:24:01 +00003835 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003836 }
3837 COPY_BUF(ql,buf,len,q);
3838 q = r;
3839 ql = rl;
3840 r = cur;
3841 rl = l;
3842
3843 count++;
3844 if (count > 50) {
3845 GROW;
3846 count = 0;
3847 }
3848 NEXTL(l);
3849 cur = CUR_CHAR(l);
3850 if (cur == 0) {
3851 SHRINK;
3852 GROW;
3853 cur = CUR_CHAR(l);
3854 }
3855 }
3856 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003857 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003858 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003859 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003860 xmlFree(buf);
3861 } else {
3862 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003863 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3864 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 NEXT;
3867 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3868 (!ctxt->disableSAX))
3869 ctxt->sax->comment(ctxt->userData, buf);
3870 xmlFree(buf);
3871 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003872 return;
3873not_terminated:
3874 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3875 "Comment not terminated\n", NULL);
3876 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003877}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003878/**
3879 * xmlParseComment:
3880 * @ctxt: an XML parser context
3881 *
3882 * Skip an XML (SGML) comment <!-- .... -->
3883 * The spec says that "For compatibility, the string "--" (double-hyphen)
3884 * must not occur within comments. "
3885 *
3886 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3887 */
3888void
3889xmlParseComment(xmlParserCtxtPtr ctxt) {
3890 xmlChar *buf = NULL;
3891 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003892 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003893 xmlParserInputState state;
3894 const xmlChar *in;
3895 int nbchar = 0, ccol;
3896
3897 /*
3898 * Check that there is a comment right here.
3899 */
3900 if ((RAW != '<') || (NXT(1) != '!') ||
3901 (NXT(2) != '-') || (NXT(3) != '-')) return;
3902
3903 state = ctxt->instate;
3904 ctxt->instate = XML_PARSER_COMMENT;
3905 SKIP(4);
3906 SHRINK;
3907 GROW;
3908
3909 /*
3910 * Accelerated common case where input don't need to be
3911 * modified before passing it to the handler.
3912 */
3913 in = ctxt->input->cur;
3914 do {
3915 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003916 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003917 ctxt->input->line++; ctxt->input->col = 1;
3918 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003919 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003920 }
3921get_more:
3922 ccol = ctxt->input->col;
3923 while (((*in > '-') && (*in <= 0x7F)) ||
3924 ((*in >= 0x20) && (*in < '-')) ||
3925 (*in == 0x09)) {
3926 in++;
3927 ccol++;
3928 }
3929 ctxt->input->col = ccol;
3930 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003931 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003932 ctxt->input->line++; ctxt->input->col = 1;
3933 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003934 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003935 goto get_more;
3936 }
3937 nbchar = in - ctxt->input->cur;
3938 /*
3939 * save current set of data
3940 */
3941 if (nbchar > 0) {
3942 if ((ctxt->sax != NULL) &&
3943 (ctxt->sax->comment != NULL)) {
3944 if (buf == NULL) {
3945 if ((*in == '-') && (in[1] == '-'))
3946 size = nbchar + 1;
3947 else
3948 size = XML_PARSER_BUFFER_SIZE + nbchar;
3949 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3950 if (buf == NULL) {
3951 xmlErrMemory(ctxt, NULL);
3952 ctxt->instate = state;
3953 return;
3954 }
3955 len = 0;
3956 } else if (len + nbchar + 1 >= size) {
3957 xmlChar *new_buf;
3958 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3959 new_buf = (xmlChar *) xmlRealloc(buf,
3960 size * sizeof(xmlChar));
3961 if (new_buf == NULL) {
3962 xmlFree (buf);
3963 xmlErrMemory(ctxt, NULL);
3964 ctxt->instate = state;
3965 return;
3966 }
3967 buf = new_buf;
3968 }
3969 memcpy(&buf[len], ctxt->input->cur, nbchar);
3970 len += nbchar;
3971 buf[len] = 0;
3972 }
3973 }
3974 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003975 if (*in == 0xA) {
3976 in++;
3977 ctxt->input->line++; ctxt->input->col = 1;
3978 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003979 if (*in == 0xD) {
3980 in++;
3981 if (*in == 0xA) {
3982 ctxt->input->cur = in;
3983 in++;
3984 ctxt->input->line++; ctxt->input->col = 1;
3985 continue; /* while */
3986 }
3987 in--;
3988 }
3989 SHRINK;
3990 GROW;
3991 in = ctxt->input->cur;
3992 if (*in == '-') {
3993 if (in[1] == '-') {
3994 if (in[2] == '>') {
3995 SKIP(3);
3996 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3997 (!ctxt->disableSAX)) {
3998 if (buf != NULL)
3999 ctxt->sax->comment(ctxt->userData, buf);
4000 else
4001 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4002 }
4003 if (buf != NULL)
4004 xmlFree(buf);
4005 ctxt->instate = state;
4006 return;
4007 }
4008 if (buf != NULL)
4009 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4010 "Comment not terminated \n<!--%.50s\n",
4011 buf);
4012 else
4013 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4014 "Comment not terminated \n", NULL);
4015 in++;
4016 ctxt->input->col++;
4017 }
4018 in++;
4019 ctxt->input->col++;
4020 goto get_more;
4021 }
4022 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4023 xmlParseCommentComplex(ctxt, buf, len, size);
4024 ctxt->instate = state;
4025 return;
4026}
4027
Owen Taylor3473f882001-02-23 17:55:21 +00004028
4029/**
4030 * xmlParsePITarget:
4031 * @ctxt: an XML parser context
4032 *
4033 * parse the name of a PI
4034 *
4035 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4036 *
4037 * Returns the PITarget name or NULL
4038 */
4039
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004040const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004041xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004042 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004043
4044 name = xmlParseName(ctxt);
4045 if ((name != NULL) &&
4046 ((name[0] == 'x') || (name[0] == 'X')) &&
4047 ((name[1] == 'm') || (name[1] == 'M')) &&
4048 ((name[2] == 'l') || (name[2] == 'L'))) {
4049 int i;
4050 if ((name[0] == 'x') && (name[1] == 'm') &&
4051 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004052 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004053 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004054 return(name);
4055 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004056 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004057 return(name);
4058 }
4059 for (i = 0;;i++) {
4060 if (xmlW3CPIs[i] == NULL) break;
4061 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4062 return(name);
4063 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004064 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4065 "xmlParsePITarget: invalid name prefix 'xml'\n",
4066 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
4068 return(name);
4069}
4070
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004071#ifdef LIBXML_CATALOG_ENABLED
4072/**
4073 * xmlParseCatalogPI:
4074 * @ctxt: an XML parser context
4075 * @catalog: the PI value string
4076 *
4077 * parse an XML Catalog Processing Instruction.
4078 *
4079 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4080 *
4081 * Occurs only if allowed by the user and if happening in the Misc
4082 * part of the document before any doctype informations
4083 * This will add the given catalog to the parsing context in order
4084 * to be used if there is a resolution need further down in the document
4085 */
4086
4087static void
4088xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4089 xmlChar *URL = NULL;
4090 const xmlChar *tmp, *base;
4091 xmlChar marker;
4092
4093 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004094 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004095 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4096 goto error;
4097 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004098 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004099 if (*tmp != '=') {
4100 return;
4101 }
4102 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004103 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004104 marker = *tmp;
4105 if ((marker != '\'') && (marker != '"'))
4106 goto error;
4107 tmp++;
4108 base = tmp;
4109 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4110 if (*tmp == 0)
4111 goto error;
4112 URL = xmlStrndup(base, tmp - base);
4113 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004114 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004115 if (*tmp != 0)
4116 goto error;
4117
4118 if (URL != NULL) {
4119 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4120 xmlFree(URL);
4121 }
4122 return;
4123
4124error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004125 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4126 "Catalog PI syntax error: %s\n",
4127 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004128 if (URL != NULL)
4129 xmlFree(URL);
4130}
4131#endif
4132
Owen Taylor3473f882001-02-23 17:55:21 +00004133/**
4134 * xmlParsePI:
4135 * @ctxt: an XML parser context
4136 *
4137 * parse an XML Processing Instruction.
4138 *
4139 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4140 *
4141 * The processing is transfered to SAX once parsed.
4142 */
4143
4144void
4145xmlParsePI(xmlParserCtxtPtr ctxt) {
4146 xmlChar *buf = NULL;
4147 int len = 0;
4148 int size = XML_PARSER_BUFFER_SIZE;
4149 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004150 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004151 xmlParserInputState state;
4152 int count = 0;
4153
4154 if ((RAW == '<') && (NXT(1) == '?')) {
4155 xmlParserInputPtr input = ctxt->input;
4156 state = ctxt->instate;
4157 ctxt->instate = XML_PARSER_PI;
4158 /*
4159 * this is a Processing Instruction.
4160 */
4161 SKIP(2);
4162 SHRINK;
4163
4164 /*
4165 * Parse the target name and check for special support like
4166 * namespace.
4167 */
4168 target = xmlParsePITarget(ctxt);
4169 if (target != NULL) {
4170 if ((RAW == '?') && (NXT(1) == '>')) {
4171 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004172 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4173 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004174 }
4175 SKIP(2);
4176
4177 /*
4178 * SAX: PI detected.
4179 */
4180 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4181 (ctxt->sax->processingInstruction != NULL))
4182 ctxt->sax->processingInstruction(ctxt->userData,
4183 target, NULL);
4184 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 return;
4186 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004188 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004189 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 ctxt->instate = state;
4191 return;
4192 }
4193 cur = CUR;
4194 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004195 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4196 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004197 }
4198 SKIP_BLANKS;
4199 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004200 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004201 ((cur != '?') || (NXT(1) != '>'))) {
4202 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004203 xmlChar *tmp;
4204
Owen Taylor3473f882001-02-23 17:55:21 +00004205 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004206 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4207 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004208 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004209 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 ctxt->instate = state;
4211 return;
4212 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004213 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 }
4215 count++;
4216 if (count > 50) {
4217 GROW;
4218 count = 0;
4219 }
4220 COPY_BUF(l,buf,len,cur);
4221 NEXTL(l);
4222 cur = CUR_CHAR(l);
4223 if (cur == 0) {
4224 SHRINK;
4225 GROW;
4226 cur = CUR_CHAR(l);
4227 }
4228 }
4229 buf[len] = 0;
4230 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004231 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4232 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004233 } else {
4234 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004235 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4236 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
4238 SKIP(2);
4239
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004240#ifdef LIBXML_CATALOG_ENABLED
4241 if (((state == XML_PARSER_MISC) ||
4242 (state == XML_PARSER_START)) &&
4243 (xmlStrEqual(target, XML_CATALOG_PI))) {
4244 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4245 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4246 (allow == XML_CATA_ALLOW_ALL))
4247 xmlParseCatalogPI(ctxt, buf);
4248 }
4249#endif
4250
4251
Owen Taylor3473f882001-02-23 17:55:21 +00004252 /*
4253 * SAX: PI detected.
4254 */
4255 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4256 (ctxt->sax->processingInstruction != NULL))
4257 ctxt->sax->processingInstruction(ctxt->userData,
4258 target, buf);
4259 }
4260 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004261 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004262 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 }
4264 ctxt->instate = state;
4265 }
4266}
4267
4268/**
4269 * xmlParseNotationDecl:
4270 * @ctxt: an XML parser context
4271 *
4272 * parse a notation declaration
4273 *
4274 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4275 *
4276 * Hence there is actually 3 choices:
4277 * 'PUBLIC' S PubidLiteral
4278 * 'PUBLIC' S PubidLiteral S SystemLiteral
4279 * and 'SYSTEM' S SystemLiteral
4280 *
4281 * See the NOTE on xmlParseExternalID().
4282 */
4283
4284void
4285xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004286 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004287 xmlChar *Pubid;
4288 xmlChar *Systemid;
4289
Daniel Veillarda07050d2003-10-19 14:46:32 +00004290 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004291 xmlParserInputPtr input = ctxt->input;
4292 SHRINK;
4293 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004294 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004295 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4296 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004297 return;
4298 }
4299 SKIP_BLANKS;
4300
Daniel Veillard76d66f42001-05-16 21:05:17 +00004301 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004303 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 return;
4305 }
William M. Brack76e95df2003-10-18 16:20:14 +00004306 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004308 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004309 return;
4310 }
4311 SKIP_BLANKS;
4312
4313 /*
4314 * Parse the IDs.
4315 */
4316 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4317 SKIP_BLANKS;
4318
4319 if (RAW == '>') {
4320 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4322 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
4324 NEXT;
4325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4326 (ctxt->sax->notationDecl != NULL))
4327 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
Owen Taylor3473f882001-02-23 17:55:21 +00004331 if (Systemid != NULL) xmlFree(Systemid);
4332 if (Pubid != NULL) xmlFree(Pubid);
4333 }
4334}
4335
4336/**
4337 * xmlParseEntityDecl:
4338 * @ctxt: an XML parser context
4339 *
4340 * parse <!ENTITY declarations
4341 *
4342 * [70] EntityDecl ::= GEDecl | PEDecl
4343 *
4344 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4345 *
4346 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4347 *
4348 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4349 *
4350 * [74] PEDef ::= EntityValue | ExternalID
4351 *
4352 * [76] NDataDecl ::= S 'NDATA' S Name
4353 *
4354 * [ VC: Notation Declared ]
4355 * The Name must match the declared name of a notation.
4356 */
4357
4358void
4359xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004360 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004361 xmlChar *value = NULL;
4362 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004363 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 int isParameter = 0;
4365 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004366 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004367
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004369 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004370 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 SHRINK;
4372 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004373 skipped = SKIP_BLANKS;
4374 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4376 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
Owen Taylor3473f882001-02-23 17:55:21 +00004378
4379 if (RAW == '%') {
4380 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004381 skipped = SKIP_BLANKS;
4382 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004383 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4384 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004385 }
Owen Taylor3473f882001-02-23 17:55:21 +00004386 isParameter = 1;
4387 }
4388
Daniel Veillard76d66f42001-05-16 21:05:17 +00004389 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004391 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4392 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004393 return;
4394 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004395 skipped = SKIP_BLANKS;
4396 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004397 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4398 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400
Daniel Veillardf5582f12002-06-11 10:08:16 +00004401 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004402 /*
4403 * handle the various case of definitions...
4404 */
4405 if (isParameter) {
4406 if ((RAW == '"') || (RAW == '\'')) {
4407 value = xmlParseEntityValue(ctxt, &orig);
4408 if (value) {
4409 if ((ctxt->sax != NULL) &&
4410 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4411 ctxt->sax->entityDecl(ctxt->userData, name,
4412 XML_INTERNAL_PARAMETER_ENTITY,
4413 NULL, NULL, value);
4414 }
4415 } else {
4416 URI = xmlParseExternalID(ctxt, &literal, 1);
4417 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004418 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
4420 if (URI) {
4421 xmlURIPtr uri;
4422
4423 uri = xmlParseURI((const char *) URI);
4424 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004425 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4426 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004427 /*
4428 * This really ought to be a well formedness error
4429 * but the XML Core WG decided otherwise c.f. issue
4430 * E26 of the XML erratas.
4431 */
Owen Taylor3473f882001-02-23 17:55:21 +00004432 } else {
4433 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004434 /*
4435 * Okay this is foolish to block those but not
4436 * invalid URIs.
4437 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 } else {
4440 if ((ctxt->sax != NULL) &&
4441 (!ctxt->disableSAX) &&
4442 (ctxt->sax->entityDecl != NULL))
4443 ctxt->sax->entityDecl(ctxt->userData, name,
4444 XML_EXTERNAL_PARAMETER_ENTITY,
4445 literal, URI, NULL);
4446 }
4447 xmlFreeURI(uri);
4448 }
4449 }
4450 }
4451 } else {
4452 if ((RAW == '"') || (RAW == '\'')) {
4453 value = xmlParseEntityValue(ctxt, &orig);
4454 if ((ctxt->sax != NULL) &&
4455 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4456 ctxt->sax->entityDecl(ctxt->userData, name,
4457 XML_INTERNAL_GENERAL_ENTITY,
4458 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004459 /*
4460 * For expat compatibility in SAX mode.
4461 */
4462 if ((ctxt->myDoc == NULL) ||
4463 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4464 if (ctxt->myDoc == NULL) {
4465 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4466 }
4467 if (ctxt->myDoc->intSubset == NULL)
4468 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4469 BAD_CAST "fake", NULL, NULL);
4470
Daniel Veillard1af9a412003-08-20 22:54:39 +00004471 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4472 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004473 }
Owen Taylor3473f882001-02-23 17:55:21 +00004474 } else {
4475 URI = xmlParseExternalID(ctxt, &literal, 1);
4476 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004477 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004478 }
4479 if (URI) {
4480 xmlURIPtr uri;
4481
4482 uri = xmlParseURI((const char *)URI);
4483 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004484 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4485 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004486 /*
4487 * This really ought to be a well formedness error
4488 * but the XML Core WG decided otherwise c.f. issue
4489 * E26 of the XML erratas.
4490 */
Owen Taylor3473f882001-02-23 17:55:21 +00004491 } else {
4492 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004493 /*
4494 * Okay this is foolish to block those but not
4495 * invalid URIs.
4496 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004497 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 xmlFreeURI(uri);
4500 }
4501 }
William M. Brack76e95df2003-10-18 16:20:14 +00004502 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4504 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004505 }
4506 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004507 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004508 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004509 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4511 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004512 }
4513 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004514 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4516 (ctxt->sax->unparsedEntityDecl != NULL))
4517 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4518 literal, URI, ndata);
4519 } else {
4520 if ((ctxt->sax != NULL) &&
4521 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4522 ctxt->sax->entityDecl(ctxt->userData, name,
4523 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4524 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004525 /*
4526 * For expat compatibility in SAX mode.
4527 * assuming the entity repalcement was asked for
4528 */
4529 if ((ctxt->replaceEntities != 0) &&
4530 ((ctxt->myDoc == NULL) ||
4531 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4532 if (ctxt->myDoc == NULL) {
4533 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4534 }
4535
4536 if (ctxt->myDoc->intSubset == NULL)
4537 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4538 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004539 xmlSAX2EntityDecl(ctxt, name,
4540 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4541 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004542 }
Owen Taylor3473f882001-02-23 17:55:21 +00004543 }
4544 }
4545 }
4546 SKIP_BLANKS;
4547 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004548 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004549 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004550 } else {
4551 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004552 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4553 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004554 }
4555 NEXT;
4556 }
4557 if (orig != NULL) {
4558 /*
4559 * Ugly mechanism to save the raw entity value.
4560 */
4561 xmlEntityPtr cur = NULL;
4562
4563 if (isParameter) {
4564 if ((ctxt->sax != NULL) &&
4565 (ctxt->sax->getParameterEntity != NULL))
4566 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4567 } else {
4568 if ((ctxt->sax != NULL) &&
4569 (ctxt->sax->getEntity != NULL))
4570 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004571 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004572 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004573 }
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 if (cur != NULL) {
4576 if (cur->orig != NULL)
4577 xmlFree(orig);
4578 else
4579 cur->orig = orig;
4580 } else
4581 xmlFree(orig);
4582 }
Owen Taylor3473f882001-02-23 17:55:21 +00004583 if (value != NULL) xmlFree(value);
4584 if (URI != NULL) xmlFree(URI);
4585 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004586 }
4587}
4588
4589/**
4590 * xmlParseDefaultDecl:
4591 * @ctxt: an XML parser context
4592 * @value: Receive a possible fixed default value for the attribute
4593 *
4594 * Parse an attribute default declaration
4595 *
4596 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4597 *
4598 * [ VC: Required Attribute ]
4599 * if the default declaration is the keyword #REQUIRED, then the
4600 * attribute must be specified for all elements of the type in the
4601 * attribute-list declaration.
4602 *
4603 * [ VC: Attribute Default Legal ]
4604 * The declared default value must meet the lexical constraints of
4605 * the declared attribute type c.f. xmlValidateAttributeDecl()
4606 *
4607 * [ VC: Fixed Attribute Default ]
4608 * if an attribute has a default value declared with the #FIXED
4609 * keyword, instances of that attribute must match the default value.
4610 *
4611 * [ WFC: No < in Attribute Values ]
4612 * handled in xmlParseAttValue()
4613 *
4614 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4615 * or XML_ATTRIBUTE_FIXED.
4616 */
4617
4618int
4619xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4620 int val;
4621 xmlChar *ret;
4622
4623 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004624 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 SKIP(9);
4626 return(XML_ATTRIBUTE_REQUIRED);
4627 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004628 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004629 SKIP(8);
4630 return(XML_ATTRIBUTE_IMPLIED);
4631 }
4632 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004633 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004634 SKIP(6);
4635 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004636 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4638 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004639 }
4640 SKIP_BLANKS;
4641 }
4642 ret = xmlParseAttValue(ctxt);
4643 ctxt->instate = XML_PARSER_DTD;
4644 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004645 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004646 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004647 } else
4648 *value = ret;
4649 return(val);
4650}
4651
4652/**
4653 * xmlParseNotationType:
4654 * @ctxt: an XML parser context
4655 *
4656 * parse an Notation attribute type.
4657 *
4658 * Note: the leading 'NOTATION' S part has already being parsed...
4659 *
4660 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4661 *
4662 * [ VC: Notation Attributes ]
4663 * Values of this type must match one of the notation names included
4664 * in the declaration; all notation names in the declaration must be declared.
4665 *
4666 * Returns: the notation attribute tree built while parsing
4667 */
4668
4669xmlEnumerationPtr
4670xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004671 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004672 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4673
4674 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004675 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004676 return(NULL);
4677 }
4678 SHRINK;
4679 do {
4680 NEXT;
4681 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004682 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004684 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4685 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004686 return(ret);
4687 }
4688 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004689 if (cur == NULL) return(ret);
4690 if (last == NULL) ret = last = cur;
4691 else {
4692 last->next = cur;
4693 last = cur;
4694 }
4695 SKIP_BLANKS;
4696 } while (RAW == '|');
4697 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004698 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 if ((last != NULL) && (last != ret))
4700 xmlFreeEnumeration(last);
4701 return(ret);
4702 }
4703 NEXT;
4704 return(ret);
4705}
4706
4707/**
4708 * xmlParseEnumerationType:
4709 * @ctxt: an XML parser context
4710 *
4711 * parse an Enumeration attribute type.
4712 *
4713 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4714 *
4715 * [ VC: Enumeration ]
4716 * Values of this type must match one of the Nmtoken tokens in
4717 * the declaration
4718 *
4719 * Returns: the enumeration attribute tree built while parsing
4720 */
4721
4722xmlEnumerationPtr
4723xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4724 xmlChar *name;
4725 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4726
4727 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004728 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004729 return(NULL);
4730 }
4731 SHRINK;
4732 do {
4733 NEXT;
4734 SKIP_BLANKS;
4735 name = xmlParseNmtoken(ctxt);
4736 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004737 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 return(ret);
4739 }
4740 cur = xmlCreateEnumeration(name);
4741 xmlFree(name);
4742 if (cur == NULL) return(ret);
4743 if (last == NULL) ret = last = cur;
4744 else {
4745 last->next = cur;
4746 last = cur;
4747 }
4748 SKIP_BLANKS;
4749 } while (RAW == '|');
4750 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004751 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 return(ret);
4753 }
4754 NEXT;
4755 return(ret);
4756}
4757
4758/**
4759 * xmlParseEnumeratedType:
4760 * @ctxt: an XML parser context
4761 * @tree: the enumeration tree built while parsing
4762 *
4763 * parse an Enumerated attribute type.
4764 *
4765 * [57] EnumeratedType ::= NotationType | Enumeration
4766 *
4767 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4768 *
4769 *
4770 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4771 */
4772
4773int
4774xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004775 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004776 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4779 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004780 return(0);
4781 }
4782 SKIP_BLANKS;
4783 *tree = xmlParseNotationType(ctxt);
4784 if (*tree == NULL) return(0);
4785 return(XML_ATTRIBUTE_NOTATION);
4786 }
4787 *tree = xmlParseEnumerationType(ctxt);
4788 if (*tree == NULL) return(0);
4789 return(XML_ATTRIBUTE_ENUMERATION);
4790}
4791
4792/**
4793 * xmlParseAttributeType:
4794 * @ctxt: an XML parser context
4795 * @tree: the enumeration tree built while parsing
4796 *
4797 * parse the Attribute list def for an element
4798 *
4799 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4800 *
4801 * [55] StringType ::= 'CDATA'
4802 *
4803 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4804 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4805 *
4806 * Validity constraints for attribute values syntax are checked in
4807 * xmlValidateAttributeValue()
4808 *
4809 * [ VC: ID ]
4810 * Values of type ID must match the Name production. A name must not
4811 * appear more than once in an XML document as a value of this type;
4812 * i.e., ID values must uniquely identify the elements which bear them.
4813 *
4814 * [ VC: One ID per Element Type ]
4815 * No element type may have more than one ID attribute specified.
4816 *
4817 * [ VC: ID Attribute Default ]
4818 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4819 *
4820 * [ VC: IDREF ]
4821 * Values of type IDREF must match the Name production, and values
4822 * of type IDREFS must match Names; each IDREF Name must match the value
4823 * of an ID attribute on some element in the XML document; i.e. IDREF
4824 * values must match the value of some ID attribute.
4825 *
4826 * [ VC: Entity Name ]
4827 * Values of type ENTITY must match the Name production, values
4828 * of type ENTITIES must match Names; each Entity Name must match the
4829 * name of an unparsed entity declared in the DTD.
4830 *
4831 * [ VC: Name Token ]
4832 * Values of type NMTOKEN must match the Nmtoken production; values
4833 * of type NMTOKENS must match Nmtokens.
4834 *
4835 * Returns the attribute type
4836 */
4837int
4838xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4839 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004840 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004841 SKIP(5);
4842 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(6);
4845 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004846 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004847 SKIP(5);
4848 return(XML_ATTRIBUTE_IDREF);
4849 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4850 SKIP(2);
4851 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004852 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004853 SKIP(6);
4854 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004855 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP(8);
4857 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004858 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004859 SKIP(8);
4860 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004861 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP(7);
4863 return(XML_ATTRIBUTE_NMTOKEN);
4864 }
4865 return(xmlParseEnumeratedType(ctxt, tree));
4866}
4867
4868/**
4869 * xmlParseAttributeListDecl:
4870 * @ctxt: an XML parser context
4871 *
4872 * : parse the Attribute list def for an element
4873 *
4874 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4875 *
4876 * [53] AttDef ::= S Name S AttType S DefaultDecl
4877 *
4878 */
4879void
4880xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004881 const xmlChar *elemName;
4882 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 xmlEnumerationPtr tree;
4884
Daniel Veillarda07050d2003-10-19 14:46:32 +00004885 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlParserInputPtr input = ctxt->input;
4887
4888 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004889 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004890 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004891 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004892 }
4893 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004894 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4897 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004898 return;
4899 }
4900 SKIP_BLANKS;
4901 GROW;
4902 while (RAW != '>') {
4903 const xmlChar *check = CUR_PTR;
4904 int type;
4905 int def;
4906 xmlChar *defaultValue = NULL;
4907
4908 GROW;
4909 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004910 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004911 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004912 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4913 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004914 break;
4915 }
4916 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004917 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004918 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004919 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004920 break;
4921 }
4922 SKIP_BLANKS;
4923
4924 type = xmlParseAttributeType(ctxt, &tree);
4925 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004926 break;
4927 }
4928
4929 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004930 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004931 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4932 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004933 if (tree != NULL)
4934 xmlFreeEnumeration(tree);
4935 break;
4936 }
4937 SKIP_BLANKS;
4938
4939 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4940 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004941 if (defaultValue != NULL)
4942 xmlFree(defaultValue);
4943 if (tree != NULL)
4944 xmlFreeEnumeration(tree);
4945 break;
4946 }
4947
4948 GROW;
4949 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004950 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004952 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004953 if (defaultValue != NULL)
4954 xmlFree(defaultValue);
4955 if (tree != NULL)
4956 xmlFreeEnumeration(tree);
4957 break;
4958 }
4959 SKIP_BLANKS;
4960 }
4961 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004962 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4963 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 if (defaultValue != NULL)
4965 xmlFree(defaultValue);
4966 if (tree != NULL)
4967 xmlFreeEnumeration(tree);
4968 break;
4969 }
4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->attributeDecl != NULL))
4972 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4973 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004974 else if (tree != NULL)
4975 xmlFreeEnumeration(tree);
4976
4977 if ((ctxt->sax2) && (defaultValue != NULL) &&
4978 (def != XML_ATTRIBUTE_IMPLIED) &&
4979 (def != XML_ATTRIBUTE_REQUIRED)) {
4980 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4981 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004982 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4983 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4984 }
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if (defaultValue != NULL)
4986 xmlFree(defaultValue);
4987 GROW;
4988 }
4989 if (RAW == '>') {
4990 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004991 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4992 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004993 }
4994 NEXT;
4995 }
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997}
4998
4999/**
5000 * xmlParseElementMixedContentDecl:
5001 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005002 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005003 *
5004 * parse the declaration for a Mixed Element content
5005 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5006 *
5007 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5008 * '(' S? '#PCDATA' S? ')'
5009 *
5010 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5011 *
5012 * [ VC: No Duplicate Types ]
5013 * The same name must not appear more than once in a single
5014 * mixed-content declaration.
5015 *
5016 * returns: the list of the xmlElementContentPtr describing the element choices
5017 */
5018xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005020 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005021 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005024 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005025 SKIP(7);
5026 SKIP_BLANKS;
5027 SHRINK;
5028 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5031"Element content declaration doesn't start and stop in the same entity\n",
5032 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005033 }
Owen Taylor3473f882001-02-23 17:55:21 +00005034 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005035 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005036 if (RAW == '*') {
5037 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5038 NEXT;
5039 }
5040 return(ret);
5041 }
5042 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005043 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005044 if (ret == NULL) return(NULL);
5045 }
5046 while (RAW == '|') {
5047 NEXT;
5048 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005049 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 if (ret == NULL) return(NULL);
5051 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (cur != NULL)
5053 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 cur = ret;
5055 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005056 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005057 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005058 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005059 if (n->c1 != NULL)
5060 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005061 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005062 if (n != NULL)
5063 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005065 }
5066 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005067 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005068 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005070 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005071 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 return(NULL);
5073 }
5074 SKIP_BLANKS;
5075 GROW;
5076 }
5077 if ((RAW == ')') && (NXT(1) == '*')) {
5078 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005079 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005080 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005081 if (cur->c2 != NULL)
5082 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005083 }
5084 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005085 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005086 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5087"Element content declaration doesn't start and stop in the same entity\n",
5088 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005089 }
Owen Taylor3473f882001-02-23 17:55:21 +00005090 SKIP(2);
5091 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005092 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005093 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005094 return(NULL);
5095 }
5096
5097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005098 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 }
5100 return(ret);
5101}
5102
5103/**
5104 * xmlParseElementChildrenContentDecl:
5105 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005106 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005107 *
5108 * parse the declaration for a Mixed Element content
5109 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5110 *
5111 *
5112 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5113 *
5114 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5115 *
5116 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5117 *
5118 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5119 *
5120 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5121 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005122 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005123 * opening or closing parentheses in a choice, seq, or Mixed
5124 * construct is contained in the replacement text for a parameter
5125 * entity, both must be contained in the same replacement text. For
5126 * interoperability, if a parameter-entity reference appears in a
5127 * choice, seq, or Mixed construct, its replacement text should not
5128 * be empty, and neither the first nor last non-blank character of
5129 * the replacement text should be a connector (| or ,).
5130 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005131 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005132 * hierarchy.
5133 */
5134xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005135xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005136 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005137 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlChar type = 0;
5139
5140 SKIP_BLANKS;
5141 GROW;
5142 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005144
Owen Taylor3473f882001-02-23 17:55:21 +00005145 /* Recurse on first child */
5146 NEXT;
5147 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005148 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005149 SKIP_BLANKS;
5150 GROW;
5151 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005152 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005154 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 return(NULL);
5156 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005157 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005158 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005159 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005160 return(NULL);
5161 }
Owen Taylor3473f882001-02-23 17:55:21 +00005162 GROW;
5163 if (RAW == '?') {
5164 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5165 NEXT;
5166 } else if (RAW == '*') {
5167 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5168 NEXT;
5169 } else if (RAW == '+') {
5170 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5171 NEXT;
5172 } else {
5173 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5174 }
Owen Taylor3473f882001-02-23 17:55:21 +00005175 GROW;
5176 }
5177 SKIP_BLANKS;
5178 SHRINK;
5179 while (RAW != ')') {
5180 /*
5181 * Each loop we parse one separator and one element.
5182 */
5183 if (RAW == ',') {
5184 if (type == 0) type = CUR;
5185
5186 /*
5187 * Detect "Name | Name , Name" error
5188 */
5189 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005190 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005191 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005193 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005194 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005195 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 return(NULL);
5198 }
5199 NEXT;
5200
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005201 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005203 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 xmlFreeDocElementContent(ctxt->myDoc, last);
5205 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005206 return(NULL);
5207 }
5208 if (last == NULL) {
5209 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005210 if (ret != NULL)
5211 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005212 ret = cur = op;
5213 } else {
5214 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005215 if (op != NULL)
5216 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005217 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (last != NULL)
5219 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 cur =op;
5221 last = NULL;
5222 }
5223 } else if (RAW == '|') {
5224 if (type == 0) type = CUR;
5225
5226 /*
5227 * Detect "Name , Name | Name" error
5228 */
5229 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005230 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005231 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005233 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005234 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005235 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 return(NULL);
5238 }
5239 NEXT;
5240
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005241 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005242 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005243 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 return(NULL);
5248 }
5249 if (last == NULL) {
5250 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005251 if (ret != NULL)
5252 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005253 ret = cur = op;
5254 } else {
5255 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005256 if (op != NULL)
5257 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005258 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005259 if (last != NULL)
5260 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 cur =op;
5262 last = NULL;
5263 }
5264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005265 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 return(NULL);
5269 }
5270 GROW;
5271 SKIP_BLANKS;
5272 GROW;
5273 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005274 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 /* Recurse on second child */
5276 NEXT;
5277 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005278 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005279 SKIP_BLANKS;
5280 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005281 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005283 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005285 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 return(NULL);
5287 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005288 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005289 if (RAW == '?') {
5290 last->ocur = XML_ELEMENT_CONTENT_OPT;
5291 NEXT;
5292 } else if (RAW == '*') {
5293 last->ocur = XML_ELEMENT_CONTENT_MULT;
5294 NEXT;
5295 } else if (RAW == '+') {
5296 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5297 NEXT;
5298 } else {
5299 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5300 }
5301 }
5302 SKIP_BLANKS;
5303 GROW;
5304 }
5305 if ((cur != NULL) && (last != NULL)) {
5306 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005307 if (last != NULL)
5308 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005309 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005311 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5312"Element content declaration doesn't start and stop in the same entity\n",
5313 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005314 }
Owen Taylor3473f882001-02-23 17:55:21 +00005315 NEXT;
5316 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005317 if (ret != NULL) {
5318 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5319 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5320 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5321 else
5322 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5323 }
Owen Taylor3473f882001-02-23 17:55:21 +00005324 NEXT;
5325 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005326 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005327 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005328 cur = ret;
5329 /*
5330 * Some normalization:
5331 * (a | b* | c?)* == (a | b | c)*
5332 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005333 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005334 if ((cur->c1 != NULL) &&
5335 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5336 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5337 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5338 if ((cur->c2 != NULL) &&
5339 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5340 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5341 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5342 cur = cur->c2;
5343 }
5344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345 NEXT;
5346 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005347 if (ret != NULL) {
5348 int found = 0;
5349
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005350 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5351 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5352 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005353 else
5354 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005355 /*
5356 * Some normalization:
5357 * (a | b*)+ == (a | b)*
5358 * (a | b?)+ == (a | b)*
5359 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005360 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005361 if ((cur->c1 != NULL) &&
5362 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5363 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5364 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5365 found = 1;
5366 }
5367 if ((cur->c2 != NULL) &&
5368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5371 found = 1;
5372 }
5373 cur = cur->c2;
5374 }
5375 if (found)
5376 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5377 }
Owen Taylor3473f882001-02-23 17:55:21 +00005378 NEXT;
5379 }
5380 return(ret);
5381}
5382
5383/**
5384 * xmlParseElementContentDecl:
5385 * @ctxt: an XML parser context
5386 * @name: the name of the element being defined.
5387 * @result: the Element Content pointer will be stored here if any
5388 *
5389 * parse the declaration for an Element content either Mixed or Children,
5390 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5391 *
5392 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5393 *
5394 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5395 */
5396
5397int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005398xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005399 xmlElementContentPtr *result) {
5400
5401 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005402 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 int res;
5404
5405 *result = NULL;
5406
5407 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005408 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005409 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005410 return(-1);
5411 }
5412 NEXT;
5413 GROW;
5414 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005415 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005416 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 res = XML_ELEMENT_TYPE_MIXED;
5418 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 res = XML_ELEMENT_TYPE_ELEMENT;
5421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 SKIP_BLANKS;
5423 *result = tree;
5424 return(res);
5425}
5426
5427/**
5428 * xmlParseElementDecl:
5429 * @ctxt: an XML parser context
5430 *
5431 * parse an Element declaration.
5432 *
5433 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5434 *
5435 * [ VC: Unique Element Type Declaration ]
5436 * No element type may be declared more than once
5437 *
5438 * Returns the type of the element, or -1 in case of error
5439 */
5440int
5441xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005442 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005443 int ret = -1;
5444 xmlElementContentPtr content = NULL;
5445
Daniel Veillard4c778d82005-01-23 17:37:44 +00005446 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005447 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005448 xmlParserInputPtr input = ctxt->input;
5449
5450 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005451 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005454 }
5455 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005456 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005457 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5459 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005460 return(-1);
5461 }
5462 while ((RAW == 0) && (ctxt->inputNr > 1))
5463 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005464 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5466 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005469 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005470 SKIP(5);
5471 /*
5472 * Element must always be empty.
5473 */
5474 ret = XML_ELEMENT_TYPE_EMPTY;
5475 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5476 (NXT(2) == 'Y')) {
5477 SKIP(3);
5478 /*
5479 * Element is a generic container.
5480 */
5481 ret = XML_ELEMENT_TYPE_ANY;
5482 } else if (RAW == '(') {
5483 ret = xmlParseElementContentDecl(ctxt, name, &content);
5484 } else {
5485 /*
5486 * [ WFC: PEs in Internal Subset ] error handling.
5487 */
5488 if ((RAW == '%') && (ctxt->external == 0) &&
5489 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005490 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005491 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005492 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005494 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5495 }
Owen Taylor3473f882001-02-23 17:55:21 +00005496 return(-1);
5497 }
5498
5499 SKIP_BLANKS;
5500 /*
5501 * Pop-up of finished entities.
5502 */
5503 while ((RAW == 0) && (ctxt->inputNr > 1))
5504 xmlPopInput(ctxt);
5505 SKIP_BLANKS;
5506
5507 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005509 if (content != NULL) {
5510 xmlFreeDocElementContent(ctxt->myDoc, content);
5511 }
Owen Taylor3473f882001-02-23 17:55:21 +00005512 } else {
5513 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005514 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5515 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005516 }
5517
5518 NEXT;
5519 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005520 (ctxt->sax->elementDecl != NULL)) {
5521 if (content != NULL)
5522 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005523 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5524 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005525 if ((content != NULL) && (content->parent == NULL)) {
5526 /*
5527 * this is a trick: if xmlAddElementDecl is called,
5528 * instead of copying the full tree it is plugged directly
5529 * if called from the parser. Avoid duplicating the
5530 * interfaces or change the API/ABI
5531 */
5532 xmlFreeDocElementContent(ctxt->myDoc, content);
5533 }
5534 } else if (content != NULL) {
5535 xmlFreeDocElementContent(ctxt->myDoc, content);
5536 }
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 return(ret);
5540}
5541
5542/**
Owen Taylor3473f882001-02-23 17:55:21 +00005543 * xmlParseConditionalSections
5544 * @ctxt: an XML parser context
5545 *
5546 * [61] conditionalSect ::= includeSect | ignoreSect
5547 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5548 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5549 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5550 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5551 */
5552
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005553static void
Owen Taylor3473f882001-02-23 17:55:21 +00005554xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5555 SKIP(3);
5556 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005557 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005558 SKIP(7);
5559 SKIP_BLANKS;
5560 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005561 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 } else {
5563 NEXT;
5564 }
5565 if (xmlParserDebugEntities) {
5566 if ((ctxt->input != NULL) && (ctxt->input->filename))
5567 xmlGenericError(xmlGenericErrorContext,
5568 "%s(%d): ", ctxt->input->filename,
5569 ctxt->input->line);
5570 xmlGenericError(xmlGenericErrorContext,
5571 "Entering INCLUDE Conditional Section\n");
5572 }
5573
5574 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5575 (NXT(2) != '>'))) {
5576 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005577 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005578
5579 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5580 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005581 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005582 NEXT;
5583 } else if (RAW == '%') {
5584 xmlParsePEReference(ctxt);
5585 } else
5586 xmlParseMarkupDecl(ctxt);
5587
5588 /*
5589 * Pop-up of finished entities.
5590 */
5591 while ((RAW == 0) && (ctxt->inputNr > 1))
5592 xmlPopInput(ctxt);
5593
Daniel Veillardfdc91562002-07-01 21:52:03 +00005594 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005595 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005596 break;
5597 }
5598 }
5599 if (xmlParserDebugEntities) {
5600 if ((ctxt->input != NULL) && (ctxt->input->filename))
5601 xmlGenericError(xmlGenericErrorContext,
5602 "%s(%d): ", ctxt->input->filename,
5603 ctxt->input->line);
5604 xmlGenericError(xmlGenericErrorContext,
5605 "Leaving INCLUDE Conditional Section\n");
5606 }
5607
Daniel Veillarda07050d2003-10-19 14:46:32 +00005608 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005609 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005610 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005611 int depth = 0;
5612
5613 SKIP(6);
5614 SKIP_BLANKS;
5615 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005617 } else {
5618 NEXT;
5619 }
5620 if (xmlParserDebugEntities) {
5621 if ((ctxt->input != NULL) && (ctxt->input->filename))
5622 xmlGenericError(xmlGenericErrorContext,
5623 "%s(%d): ", ctxt->input->filename,
5624 ctxt->input->line);
5625 xmlGenericError(xmlGenericErrorContext,
5626 "Entering IGNORE Conditional Section\n");
5627 }
5628
5629 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005630 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005631 * But disable SAX event generating DTD building in the meantime
5632 */
5633 state = ctxt->disableSAX;
5634 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005636 ctxt->instate = XML_PARSER_IGNORE;
5637
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005638 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5640 depth++;
5641 SKIP(3);
5642 continue;
5643 }
5644 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5645 if (--depth >= 0) SKIP(3);
5646 continue;
5647 }
5648 NEXT;
5649 continue;
5650 }
5651
5652 ctxt->disableSAX = state;
5653 ctxt->instate = instate;
5654
5655 if (xmlParserDebugEntities) {
5656 if ((ctxt->input != NULL) && (ctxt->input->filename))
5657 xmlGenericError(xmlGenericErrorContext,
5658 "%s(%d): ", ctxt->input->filename,
5659 ctxt->input->line);
5660 xmlGenericError(xmlGenericErrorContext,
5661 "Leaving IGNORE Conditional Section\n");
5662 }
5663
5664 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005665 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 }
5667
5668 if (RAW == 0)
5669 SHRINK;
5670
5671 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005672 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 } else {
5674 SKIP(3);
5675 }
5676}
5677
5678/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005679 * xmlParseMarkupDecl:
5680 * @ctxt: an XML parser context
5681 *
5682 * parse Markup declarations
5683 *
5684 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5685 * NotationDecl | PI | Comment
5686 *
5687 * [ VC: Proper Declaration/PE Nesting ]
5688 * Parameter-entity replacement text must be properly nested with
5689 * markup declarations. That is to say, if either the first character
5690 * or the last character of a markup declaration (markupdecl above) is
5691 * contained in the replacement text for a parameter-entity reference,
5692 * both must be contained in the same replacement text.
5693 *
5694 * [ WFC: PEs in Internal Subset ]
5695 * In the internal DTD subset, parameter-entity references can occur
5696 * only where markup declarations can occur, not within markup declarations.
5697 * (This does not apply to references that occur in external parameter
5698 * entities or to the external subset.)
5699 */
5700void
5701xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5702 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005703 if (CUR == '<') {
5704 if (NXT(1) == '!') {
5705 switch (NXT(2)) {
5706 case 'E':
5707 if (NXT(3) == 'L')
5708 xmlParseElementDecl(ctxt);
5709 else if (NXT(3) == 'N')
5710 xmlParseEntityDecl(ctxt);
5711 break;
5712 case 'A':
5713 xmlParseAttributeListDecl(ctxt);
5714 break;
5715 case 'N':
5716 xmlParseNotationDecl(ctxt);
5717 break;
5718 case '-':
5719 xmlParseComment(ctxt);
5720 break;
5721 default:
5722 /* there is an error but it will be detected later */
5723 break;
5724 }
5725 } else if (NXT(1) == '?') {
5726 xmlParsePI(ctxt);
5727 }
5728 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005729 /*
5730 * This is only for internal subset. On external entities,
5731 * the replacement is done before parsing stage
5732 */
5733 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5734 xmlParsePEReference(ctxt);
5735
5736 /*
5737 * Conditional sections are allowed from entities included
5738 * by PE References in the internal subset.
5739 */
5740 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5741 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5742 xmlParseConditionalSections(ctxt);
5743 }
5744 }
5745
5746 ctxt->instate = XML_PARSER_DTD;
5747}
5748
5749/**
5750 * xmlParseTextDecl:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse an XML declaration header for external entities
5754 *
5755 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5756 *
5757 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5758 */
5759
5760void
5761xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5762 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005763 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005764
5765 /*
5766 * We know that '<?xml' is here.
5767 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005768 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005769 SKIP(5);
5770 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005771 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772 return;
5773 }
5774
William M. Brack76e95df2003-10-18 16:20:14 +00005775 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005778 }
5779 SKIP_BLANKS;
5780
5781 /*
5782 * We may have the VersionInfo here.
5783 */
5784 version = xmlParseVersionInfo(ctxt);
5785 if (version == NULL)
5786 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005787 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005788 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005789 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5790 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005791 }
5792 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005793 ctxt->input->version = version;
5794
5795 /*
5796 * We must have the encoding declaration
5797 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005798 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005799 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5800 /*
5801 * The XML REC instructs us to stop parsing right here
5802 */
5803 return;
5804 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005805 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5806 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5807 "Missing encoding in text declaration\n");
5808 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005809
5810 SKIP_BLANKS;
5811 if ((RAW == '?') && (NXT(1) == '>')) {
5812 SKIP(2);
5813 } else if (RAW == '>') {
5814 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005815 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005816 NEXT;
5817 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005818 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005819 MOVETO_ENDTAG(CUR_PTR);
5820 NEXT;
5821 }
5822}
5823
5824/**
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * xmlParseExternalSubset:
5826 * @ctxt: an XML parser context
5827 * @ExternalID: the external identifier
5828 * @SystemID: the system identifier (or URL)
5829 *
5830 * parse Markup declarations from an external subset
5831 *
5832 * [30] extSubset ::= textDecl? extSubsetDecl
5833 *
5834 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5835 */
5836void
5837xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5838 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005839 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005841 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005842 xmlParseTextDecl(ctxt);
5843 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5844 /*
5845 * The XML REC instructs us to stop parsing right here
5846 */
5847 ctxt->instate = XML_PARSER_EOF;
5848 return;
5849 }
5850 }
5851 if (ctxt->myDoc == NULL) {
5852 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5853 }
5854 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5855 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5856
5857 ctxt->instate = XML_PARSER_DTD;
5858 ctxt->external = 1;
5859 while (((RAW == '<') && (NXT(1) == '?')) ||
5860 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005861 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005862 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005863 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005864
5865 GROW;
5866 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5867 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005868 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005869 NEXT;
5870 } else if (RAW == '%') {
5871 xmlParsePEReference(ctxt);
5872 } else
5873 xmlParseMarkupDecl(ctxt);
5874
5875 /*
5876 * Pop-up of finished entities.
5877 */
5878 while ((RAW == 0) && (ctxt->inputNr > 1))
5879 xmlPopInput(ctxt);
5880
Daniel Veillardfdc91562002-07-01 21:52:03 +00005881 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005882 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005883 break;
5884 }
5885 }
5886
5887 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005888 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 }
5890
5891}
5892
5893/**
5894 * xmlParseReference:
5895 * @ctxt: an XML parser context
5896 *
5897 * parse and handle entity references in content, depending on the SAX
5898 * interface, this may end-up in a call to character() if this is a
5899 * CharRef, a predefined entity, if there is no reference() callback.
5900 * or if the parser was asked to switch to that mode.
5901 *
5902 * [67] Reference ::= EntityRef | CharRef
5903 */
5904void
5905xmlParseReference(xmlParserCtxtPtr ctxt) {
5906 xmlEntityPtr ent;
5907 xmlChar *val;
5908 if (RAW != '&') return;
5909
5910 if (NXT(1) == '#') {
5911 int i = 0;
5912 xmlChar out[10];
5913 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005914 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005915
5916 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5917 /*
5918 * So we are using non-UTF-8 buffers
5919 * Check that the char fit on 8bits, if not
5920 * generate a CharRef.
5921 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005922 if (value <= 0xFF) {
5923 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005924 out[1] = 0;
5925 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5926 (!ctxt->disableSAX))
5927 ctxt->sax->characters(ctxt->userData, out, 1);
5928 } else {
5929 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005930 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005931 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005932 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5934 (!ctxt->disableSAX))
5935 ctxt->sax->reference(ctxt->userData, out);
5936 }
5937 } else {
5938 /*
5939 * Just encode the value in UTF-8
5940 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005941 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 out[i] = 0;
5943 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5944 (!ctxt->disableSAX))
5945 ctxt->sax->characters(ctxt->userData, out, i);
5946 }
5947 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005948 int was_checked;
5949
Owen Taylor3473f882001-02-23 17:55:21 +00005950 ent = xmlParseEntityRef(ctxt);
5951 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005952 if (!ctxt->wellFormed)
5953 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005954 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005955 if ((ent->name != NULL) &&
5956 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5957 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005958 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005959
5960
5961 /*
5962 * The first reference to the entity trigger a parsing phase
5963 * where the ent->children is filled with the result from
5964 * the parsing.
5965 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005966 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005967 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005968
Owen Taylor3473f882001-02-23 17:55:21 +00005969 value = ent->content;
5970
5971 /*
5972 * Check that this entity is well formed
5973 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005974 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005975 (value[1] == 0) && (value[0] == '<') &&
5976 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5977 /*
5978 * DONE: get definite answer on this !!!
5979 * Lots of entity decls are used to declare a single
5980 * char
5981 * <!ENTITY lt "<">
5982 * Which seems to be valid since
5983 * 2.4: The ampersand character (&) and the left angle
5984 * bracket (<) may appear in their literal form only
5985 * when used ... They are also legal within the literal
5986 * entity value of an internal entity declaration;i
5987 * see "4.3.2 Well-Formed Parsed Entities".
5988 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5989 * Looking at the OASIS test suite and James Clark
5990 * tests, this is broken. However the XML REC uses
5991 * it. Is the XML REC not well-formed ????
5992 * This is a hack to avoid this problem
5993 *
5994 * ANSWER: since lt gt amp .. are already defined,
5995 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005996 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005997 * is lousy but acceptable.
5998 */
5999 list = xmlNewDocText(ctxt->myDoc, value);
6000 if (list != NULL) {
6001 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6002 (ent->children == NULL)) {
6003 ent->children = list;
6004 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006005 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006006 list->parent = (xmlNodePtr) ent;
6007 } else {
6008 xmlFreeNodeList(list);
6009 }
6010 } else if (list != NULL) {
6011 xmlFreeNodeList(list);
6012 }
6013 } else {
6014 /*
6015 * 4.3.2: An internal general parsed entity is well-formed
6016 * if its replacement text matches the production labeled
6017 * content.
6018 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006019
6020 void *user_data;
6021 /*
6022 * This is a bit hackish but this seems the best
6023 * way to make sure both SAX and DOM entity support
6024 * behaves okay.
6025 */
6026 if (ctxt->userData == ctxt)
6027 user_data = NULL;
6028 else
6029 user_data = ctxt->userData;
6030
Owen Taylor3473f882001-02-23 17:55:21 +00006031 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6032 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006033 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6034 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006035 ctxt->depth--;
6036 } else if (ent->etype ==
6037 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6038 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006039 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006040 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006041 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006042 ctxt->depth--;
6043 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006044 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006045 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6046 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
6048 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006049 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006050 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006051 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006052 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6053 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006054 (ent->children == NULL)) {
6055 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006056 if (ctxt->replaceEntities) {
6057 /*
6058 * Prune it directly in the generated document
6059 * except for single text nodes.
6060 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006061 if (((list->type == XML_TEXT_NODE) &&
6062 (list->next == NULL)) ||
6063 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006064 list->parent = (xmlNodePtr) ent;
6065 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006066 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006067 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006068 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006069 while (list != NULL) {
6070 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006071 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006072 if (list->next == NULL)
6073 ent->last = list;
6074 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006075 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006076 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006077#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006078 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6079 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006080#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006081 }
6082 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006083 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006084 while (list != NULL) {
6085 list->parent = (xmlNodePtr) ent;
6086 if (list->next == NULL)
6087 ent->last = list;
6088 list = list->next;
6089 }
Owen Taylor3473f882001-02-23 17:55:21 +00006090 }
6091 } else {
6092 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006093 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006095 } else if ((ret != XML_ERR_OK) &&
6096 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006097 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 } else if (list != NULL) {
6099 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006100 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 }
6102 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006103 ent->checked = 1;
6104 }
6105
6106 if (ent->children == NULL) {
6107 /*
6108 * Probably running in SAX mode and the callbacks don't
6109 * build the entity content. So unless we already went
6110 * though parsing for first checking go though the entity
6111 * content to generate callbacks associated to the entity
6112 */
6113 if (was_checked == 1) {
6114 void *user_data;
6115 /*
6116 * This is a bit hackish but this seems the best
6117 * way to make sure both SAX and DOM entity support
6118 * behaves okay.
6119 */
6120 if (ctxt->userData == ctxt)
6121 user_data = NULL;
6122 else
6123 user_data = ctxt->userData;
6124
6125 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6126 ctxt->depth++;
6127 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6128 ent->content, user_data, NULL);
6129 ctxt->depth--;
6130 } else if (ent->etype ==
6131 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6132 ctxt->depth++;
6133 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6134 ctxt->sax, user_data, ctxt->depth,
6135 ent->URI, ent->ExternalID, NULL);
6136 ctxt->depth--;
6137 } else {
6138 ret = XML_ERR_ENTITY_PE_INTERNAL;
6139 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6140 "invalid entity type found\n", NULL);
6141 }
6142 if (ret == XML_ERR_ENTITY_LOOP) {
6143 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6144 return;
6145 }
6146 }
6147 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6148 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6149 /*
6150 * Entity reference callback comes second, it's somewhat
6151 * superfluous but a compatibility to historical behaviour
6152 */
6153 ctxt->sax->reference(ctxt->userData, ent->name);
6154 }
6155 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
6157 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006158 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006159 /*
6160 * Create a node.
6161 */
6162 ctxt->sax->reference(ctxt->userData, ent->name);
6163 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006164 }
6165 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006166 /*
6167 * There is a problem on the handling of _private for entities
6168 * (bug 155816): Should we copy the content of the field from
6169 * the entity (possibly overwriting some value set by the user
6170 * when a copy is created), should we leave it alone, or should
6171 * we try to take care of different situations? The problem
6172 * is exacerbated by the usage of this field by the xmlReader.
6173 * To fix this bug, we look at _private on the created node
6174 * and, if it's NULL, we copy in whatever was in the entity.
6175 * If it's not NULL we leave it alone. This is somewhat of a
6176 * hack - maybe we should have further tests to determine
6177 * what to do.
6178 */
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6180 /*
6181 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006182 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006183 * In the first occurrence list contains the replacement.
6184 * progressive == 2 means we are operating on the Reader
6185 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006186 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006187 if (((list == NULL) && (ent->owner == 0)) ||
6188 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006189 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006190
6191 /*
6192 * when operating on a reader, the entities definitions
6193 * are always owning the entities subtree.
6194 if (ctxt->parseMode == XML_PARSE_READER)
6195 ent->owner = 1;
6196 */
6197
Daniel Veillard62f313b2001-07-04 19:49:14 +00006198 cur = ent->children;
6199 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006200 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006201 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006202 if (nw->_private == NULL)
6203 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006204 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006205 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006206 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006207 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006208 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006209 if (cur == ent->last) {
6210 /*
6211 * needed to detect some strange empty
6212 * node cases in the reader tests
6213 */
6214 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006215 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006216 (nw->type == XML_ELEMENT_NODE) &&
6217 (nw->children == NULL))
6218 nw->extra = 1;
6219
Daniel Veillard62f313b2001-07-04 19:49:14 +00006220 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006221 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006222 cur = cur->next;
6223 }
Daniel Veillard81273902003-09-30 00:43:48 +00006224#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006225 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006226 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006227#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006228 } else if (list == NULL) {
6229 xmlNodePtr nw = NULL, cur, next, last,
6230 firstChild = NULL;
6231 /*
6232 * Copy the entity child list and make it the new
6233 * entity child list. The goal is to make sure any
6234 * ID or REF referenced will be the one from the
6235 * document content and not the entity copy.
6236 */
6237 cur = ent->children;
6238 ent->children = NULL;
6239 last = ent->last;
6240 ent->last = NULL;
6241 while (cur != NULL) {
6242 next = cur->next;
6243 cur->next = NULL;
6244 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006245 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006246 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006247 if (nw->_private == NULL)
6248 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006249 if (firstChild == NULL){
6250 firstChild = cur;
6251 }
6252 xmlAddChild((xmlNodePtr) ent, nw);
6253 xmlAddChild(ctxt->node, cur);
6254 }
6255 if (cur == last)
6256 break;
6257 cur = next;
6258 }
6259 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006260#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006261 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6262 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006263#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006264 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006265 const xmlChar *nbktext;
6266
Daniel Veillard62f313b2001-07-04 19:49:14 +00006267 /*
6268 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006269 * node with a possible previous text one which
6270 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006271 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006272 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6273 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006274 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006275 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if ((ent->last != ent->children) &&
6277 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006278 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006279 xmlAddChildList(ctxt->node, ent->children);
6280 }
6281
Owen Taylor3473f882001-02-23 17:55:21 +00006282 /*
6283 * This is to avoid a nasty side effect, see
6284 * characters() in SAX.c
6285 */
6286 ctxt->nodemem = 0;
6287 ctxt->nodelen = 0;
6288 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006289 }
6290 }
6291 } else {
6292 val = ent->content;
6293 if (val == NULL) return;
6294 /*
6295 * inline the entity.
6296 */
6297 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6298 (!ctxt->disableSAX))
6299 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6300 }
6301 }
6302}
6303
6304/**
6305 * xmlParseEntityRef:
6306 * @ctxt: an XML parser context
6307 *
6308 * parse ENTITY references declarations
6309 *
6310 * [68] EntityRef ::= '&' Name ';'
6311 *
6312 * [ WFC: Entity Declared ]
6313 * In a document without any DTD, a document with only an internal DTD
6314 * subset which contains no parameter entity references, or a document
6315 * with "standalone='yes'", the Name given in the entity reference
6316 * must match that in an entity declaration, except that well-formed
6317 * documents need not declare any of the following entities: amp, lt,
6318 * gt, apos, quot. The declaration of a parameter entity must precede
6319 * any reference to it. Similarly, the declaration of a general entity
6320 * must precede any reference to it which appears in a default value in an
6321 * attribute-list declaration. Note that if entities are declared in the
6322 * external subset or in external parameter entities, a non-validating
6323 * processor is not obligated to read and process their declarations;
6324 * for such documents, the rule that an entity must be declared is a
6325 * well-formedness constraint only if standalone='yes'.
6326 *
6327 * [ WFC: Parsed Entity ]
6328 * An entity reference must not contain the name of an unparsed entity
6329 *
6330 * Returns the xmlEntityPtr if found, or NULL otherwise.
6331 */
6332xmlEntityPtr
6333xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006334 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006335 xmlEntityPtr ent = NULL;
6336
6337 GROW;
6338
6339 if (RAW == '&') {
6340 NEXT;
6341 name = xmlParseName(ctxt);
6342 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006343 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6344 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006345 } else {
6346 if (RAW == ';') {
6347 NEXT;
6348 /*
6349 * Ask first SAX for entity resolution, otherwise try the
6350 * predefined set.
6351 */
6352 if (ctxt->sax != NULL) {
6353 if (ctxt->sax->getEntity != NULL)
6354 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006355 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006356 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006357 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6358 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006359 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006360 }
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 /*
6363 * [ WFC: Entity Declared ]
6364 * In a document without any DTD, a document with only an
6365 * internal DTD subset which contains no parameter entity
6366 * references, or a document with "standalone='yes'", the
6367 * Name given in the entity reference must match that in an
6368 * entity declaration, except that well-formed documents
6369 * need not declare any of the following entities: amp, lt,
6370 * gt, apos, quot.
6371 * The declaration of a parameter entity must precede any
6372 * reference to it.
6373 * Similarly, the declaration of a general entity must
6374 * precede any reference to it which appears in a default
6375 * value in an attribute-list declaration. Note that if
6376 * entities are declared in the external subset or in
6377 * external parameter entities, a non-validating processor
6378 * is not obligated to read and process their declarations;
6379 * for such documents, the rule that an entity must be
6380 * declared is a well-formedness constraint only if
6381 * standalone='yes'.
6382 */
6383 if (ent == NULL) {
6384 if ((ctxt->standalone == 1) ||
6385 ((ctxt->hasExternalSubset == 0) &&
6386 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006387 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006388 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006389 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006390 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006391 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006392 if ((ctxt->inSubset == 0) &&
6393 (ctxt->sax != NULL) &&
6394 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006395 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006396 }
Owen Taylor3473f882001-02-23 17:55:21 +00006397 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006398 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
6400
6401 /*
6402 * [ WFC: Parsed Entity ]
6403 * An entity reference must not contain the name of an
6404 * unparsed entity
6405 */
6406 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006407 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006408 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006409 }
6410
6411 /*
6412 * [ WFC: No External Entity References ]
6413 * Attribute values cannot contain direct or indirect
6414 * entity references to external entities.
6415 */
6416 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6417 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006418 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6419 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 /*
6422 * [ WFC: No < in Attribute Values ]
6423 * The replacement text of any entity referred to directly or
6424 * indirectly in an attribute value (other than "&lt;") must
6425 * not contain a <.
6426 */
6427 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6428 (ent != NULL) &&
6429 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6430 (ent->content != NULL) &&
6431 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006432 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006433 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006434 }
6435
6436 /*
6437 * Internal check, no parameter entities here ...
6438 */
6439 else {
6440 switch (ent->etype) {
6441 case XML_INTERNAL_PARAMETER_ENTITY:
6442 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006443 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6444 "Attempt to reference the parameter entity '%s'\n",
6445 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006446 break;
6447 default:
6448 break;
6449 }
6450 }
6451
6452 /*
6453 * [ WFC: No Recursion ]
6454 * A parsed entity must not contain a recursive reference
6455 * to itself, either directly or indirectly.
6456 * Done somewhere else
6457 */
6458
6459 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006460 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 }
Owen Taylor3473f882001-02-23 17:55:21 +00006462 }
6463 }
6464 return(ent);
6465}
6466
6467/**
6468 * xmlParseStringEntityRef:
6469 * @ctxt: an XML parser context
6470 * @str: a pointer to an index in the string
6471 *
6472 * parse ENTITY references declarations, but this version parses it from
6473 * a string value.
6474 *
6475 * [68] EntityRef ::= '&' Name ';'
6476 *
6477 * [ WFC: Entity Declared ]
6478 * In a document without any DTD, a document with only an internal DTD
6479 * subset which contains no parameter entity references, or a document
6480 * with "standalone='yes'", the Name given in the entity reference
6481 * must match that in an entity declaration, except that well-formed
6482 * documents need not declare any of the following entities: amp, lt,
6483 * gt, apos, quot. The declaration of a parameter entity must precede
6484 * any reference to it. Similarly, the declaration of a general entity
6485 * must precede any reference to it which appears in a default value in an
6486 * attribute-list declaration. Note that if entities are declared in the
6487 * external subset or in external parameter entities, a non-validating
6488 * processor is not obligated to read and process their declarations;
6489 * for such documents, the rule that an entity must be declared is a
6490 * well-formedness constraint only if standalone='yes'.
6491 *
6492 * [ WFC: Parsed Entity ]
6493 * An entity reference must not contain the name of an unparsed entity
6494 *
6495 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6496 * is updated to the current location in the string.
6497 */
6498xmlEntityPtr
6499xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6500 xmlChar *name;
6501 const xmlChar *ptr;
6502 xmlChar cur;
6503 xmlEntityPtr ent = NULL;
6504
6505 if ((str == NULL) || (*str == NULL))
6506 return(NULL);
6507 ptr = *str;
6508 cur = *ptr;
6509 if (cur == '&') {
6510 ptr++;
6511 cur = *ptr;
6512 name = xmlParseStringName(ctxt, &ptr);
6513 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006514 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6515 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006516 } else {
6517 if (*ptr == ';') {
6518 ptr++;
6519 /*
6520 * Ask first SAX for entity resolution, otherwise try the
6521 * predefined set.
6522 */
6523 if (ctxt->sax != NULL) {
6524 if (ctxt->sax->getEntity != NULL)
6525 ent = ctxt->sax->getEntity(ctxt->userData, name);
6526 if (ent == NULL)
6527 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006528 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006529 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006530 }
Owen Taylor3473f882001-02-23 17:55:21 +00006531 }
6532 /*
6533 * [ WFC: Entity Declared ]
6534 * In a document without any DTD, a document with only an
6535 * internal DTD subset which contains no parameter entity
6536 * references, or a document with "standalone='yes'", the
6537 * Name given in the entity reference must match that in an
6538 * entity declaration, except that well-formed documents
6539 * need not declare any of the following entities: amp, lt,
6540 * gt, apos, quot.
6541 * The declaration of a parameter entity must precede any
6542 * reference to it.
6543 * Similarly, the declaration of a general entity must
6544 * precede any reference to it which appears in a default
6545 * value in an attribute-list declaration. Note that if
6546 * entities are declared in the external subset or in
6547 * external parameter entities, a non-validating processor
6548 * is not obligated to read and process their declarations;
6549 * for such documents, the rule that an entity must be
6550 * declared is a well-formedness constraint only if
6551 * standalone='yes'.
6552 */
6553 if (ent == NULL) {
6554 if ((ctxt->standalone == 1) ||
6555 ((ctxt->hasExternalSubset == 0) &&
6556 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006557 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006558 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006559 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006560 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006561 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006563 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006564 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
6566
6567 /*
6568 * [ WFC: Parsed Entity ]
6569 * An entity reference must not contain the name of an
6570 * unparsed entity
6571 */
6572 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006574 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
6576
6577 /*
6578 * [ WFC: No External Entity References ]
6579 * Attribute values cannot contain direct or indirect
6580 * entity references to external entities.
6581 */
6582 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6583 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006584 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006585 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006586 }
6587 /*
6588 * [ WFC: No < in Attribute Values ]
6589 * The replacement text of any entity referred to directly or
6590 * indirectly in an attribute value (other than "&lt;") must
6591 * not contain a <.
6592 */
6593 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6594 (ent != NULL) &&
6595 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6596 (ent->content != NULL) &&
6597 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006598 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6599 "'<' in entity '%s' is not allowed in attributes values\n",
6600 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006601 }
6602
6603 /*
6604 * Internal check, no parameter entities here ...
6605 */
6606 else {
6607 switch (ent->etype) {
6608 case XML_INTERNAL_PARAMETER_ENTITY:
6609 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006610 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6611 "Attempt to reference the parameter entity '%s'\n",
6612 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006613 break;
6614 default:
6615 break;
6616 }
6617 }
6618
6619 /*
6620 * [ WFC: No Recursion ]
6621 * A parsed entity must not contain a recursive reference
6622 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006623 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006624 */
6625
6626 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006627 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006628 }
6629 xmlFree(name);
6630 }
6631 }
6632 *str = ptr;
6633 return(ent);
6634}
6635
6636/**
6637 * xmlParsePEReference:
6638 * @ctxt: an XML parser context
6639 *
6640 * parse PEReference declarations
6641 * The entity content is handled directly by pushing it's content as
6642 * a new input stream.
6643 *
6644 * [69] PEReference ::= '%' Name ';'
6645 *
6646 * [ WFC: No Recursion ]
6647 * A parsed entity must not contain a recursive
6648 * reference to itself, either directly or indirectly.
6649 *
6650 * [ WFC: Entity Declared ]
6651 * In a document without any DTD, a document with only an internal DTD
6652 * subset which contains no parameter entity references, or a document
6653 * with "standalone='yes'", ... ... The declaration of a parameter
6654 * entity must precede any reference to it...
6655 *
6656 * [ VC: Entity Declared ]
6657 * In a document with an external subset or external parameter entities
6658 * with "standalone='no'", ... ... The declaration of a parameter entity
6659 * must precede any reference to it...
6660 *
6661 * [ WFC: In DTD ]
6662 * Parameter-entity references may only appear in the DTD.
6663 * NOTE: misleading but this is handled.
6664 */
6665void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006666xmlParsePEReference(xmlParserCtxtPtr ctxt)
6667{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006668 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006669 xmlEntityPtr entity = NULL;
6670 xmlParserInputPtr input;
6671
6672 if (RAW == '%') {
6673 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006674 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006675 if (name == NULL) {
6676 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6677 "xmlParsePEReference: no name\n");
6678 } else {
6679 if (RAW == ';') {
6680 NEXT;
6681 if ((ctxt->sax != NULL) &&
6682 (ctxt->sax->getParameterEntity != NULL))
6683 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6684 name);
6685 if (entity == NULL) {
6686 /*
6687 * [ WFC: Entity Declared ]
6688 * In a document without any DTD, a document with only an
6689 * internal DTD subset which contains no parameter entity
6690 * references, or a document with "standalone='yes'", ...
6691 * ... The declaration of a parameter entity must precede
6692 * any reference to it...
6693 */
6694 if ((ctxt->standalone == 1) ||
6695 ((ctxt->hasExternalSubset == 0) &&
6696 (ctxt->hasPErefs == 0))) {
6697 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6698 "PEReference: %%%s; not found\n",
6699 name);
6700 } else {
6701 /*
6702 * [ VC: Entity Declared ]
6703 * In a document with an external subset or external
6704 * parameter entities with "standalone='no'", ...
6705 * ... The declaration of a parameter entity must
6706 * precede any reference to it...
6707 */
6708 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6709 "PEReference: %%%s; not found\n",
6710 name, NULL);
6711 ctxt->valid = 0;
6712 }
6713 } else {
6714 /*
6715 * Internal checking in case the entity quest barfed
6716 */
6717 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6718 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6719 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6720 "Internal: %%%s; is not a parameter entity\n",
6721 name, NULL);
6722 } else if (ctxt->input->free != deallocblankswrapper) {
6723 input =
6724 xmlNewBlanksWrapperInputStream(ctxt, entity);
6725 xmlPushInput(ctxt, input);
6726 } else {
6727 /*
6728 * TODO !!!
6729 * handle the extra spaces added before and after
6730 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6731 */
6732 input = xmlNewEntityInputStream(ctxt, entity);
6733 xmlPushInput(ctxt, input);
6734 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006735 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006736 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006737 xmlParseTextDecl(ctxt);
6738 if (ctxt->errNo ==
6739 XML_ERR_UNSUPPORTED_ENCODING) {
6740 /*
6741 * The XML REC instructs us to stop parsing
6742 * right here
6743 */
6744 ctxt->instate = XML_PARSER_EOF;
6745 return;
6746 }
6747 }
6748 }
6749 }
6750 ctxt->hasPErefs = 1;
6751 } else {
6752 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6753 }
6754 }
Owen Taylor3473f882001-02-23 17:55:21 +00006755 }
6756}
6757
6758/**
6759 * xmlParseStringPEReference:
6760 * @ctxt: an XML parser context
6761 * @str: a pointer to an index in the string
6762 *
6763 * parse PEReference declarations
6764 *
6765 * [69] PEReference ::= '%' Name ';'
6766 *
6767 * [ WFC: No Recursion ]
6768 * A parsed entity must not contain a recursive
6769 * reference to itself, either directly or indirectly.
6770 *
6771 * [ WFC: Entity Declared ]
6772 * In a document without any DTD, a document with only an internal DTD
6773 * subset which contains no parameter entity references, or a document
6774 * with "standalone='yes'", ... ... The declaration of a parameter
6775 * entity must precede any reference to it...
6776 *
6777 * [ VC: Entity Declared ]
6778 * In a document with an external subset or external parameter entities
6779 * with "standalone='no'", ... ... The declaration of a parameter entity
6780 * must precede any reference to it...
6781 *
6782 * [ WFC: In DTD ]
6783 * Parameter-entity references may only appear in the DTD.
6784 * NOTE: misleading but this is handled.
6785 *
6786 * Returns the string of the entity content.
6787 * str is updated to the current value of the index
6788 */
6789xmlEntityPtr
6790xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6791 const xmlChar *ptr;
6792 xmlChar cur;
6793 xmlChar *name;
6794 xmlEntityPtr entity = NULL;
6795
6796 if ((str == NULL) || (*str == NULL)) return(NULL);
6797 ptr = *str;
6798 cur = *ptr;
6799 if (cur == '%') {
6800 ptr++;
6801 cur = *ptr;
6802 name = xmlParseStringName(ctxt, &ptr);
6803 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006804 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6805 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006806 } else {
6807 cur = *ptr;
6808 if (cur == ';') {
6809 ptr++;
6810 cur = *ptr;
6811 if ((ctxt->sax != NULL) &&
6812 (ctxt->sax->getParameterEntity != NULL))
6813 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6814 name);
6815 if (entity == NULL) {
6816 /*
6817 * [ WFC: Entity Declared ]
6818 * In a document without any DTD, a document with only an
6819 * internal DTD subset which contains no parameter entity
6820 * references, or a document with "standalone='yes'", ...
6821 * ... The declaration of a parameter entity must precede
6822 * any reference to it...
6823 */
6824 if ((ctxt->standalone == 1) ||
6825 ((ctxt->hasExternalSubset == 0) &&
6826 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006827 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006828 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006829 } else {
6830 /*
6831 * [ VC: Entity Declared ]
6832 * In a document with an external subset or external
6833 * parameter entities with "standalone='no'", ...
6834 * ... The declaration of a parameter entity must
6835 * precede any reference to it...
6836 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006837 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6838 "PEReference: %%%s; not found\n",
6839 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 ctxt->valid = 0;
6841 }
6842 } else {
6843 /*
6844 * Internal checking in case the entity quest barfed
6845 */
6846 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6847 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006848 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6849 "%%%s; is not a parameter entity\n",
6850 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006851 }
6852 }
6853 ctxt->hasPErefs = 1;
6854 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006855 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
6857 xmlFree(name);
6858 }
6859 }
6860 *str = ptr;
6861 return(entity);
6862}
6863
6864/**
6865 * xmlParseDocTypeDecl:
6866 * @ctxt: an XML parser context
6867 *
6868 * parse a DOCTYPE declaration
6869 *
6870 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6871 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6872 *
6873 * [ VC: Root Element Type ]
6874 * The Name in the document type declaration must match the element
6875 * type of the root element.
6876 */
6877
6878void
6879xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006880 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006881 xmlChar *ExternalID = NULL;
6882 xmlChar *URI = NULL;
6883
6884 /*
6885 * We know that '<!DOCTYPE' has been detected.
6886 */
6887 SKIP(9);
6888
6889 SKIP_BLANKS;
6890
6891 /*
6892 * Parse the DOCTYPE name.
6893 */
6894 name = xmlParseName(ctxt);
6895 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6897 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006898 }
6899 ctxt->intSubName = name;
6900
6901 SKIP_BLANKS;
6902
6903 /*
6904 * Check for SystemID and ExternalID
6905 */
6906 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6907
6908 if ((URI != NULL) || (ExternalID != NULL)) {
6909 ctxt->hasExternalSubset = 1;
6910 }
6911 ctxt->extSubURI = URI;
6912 ctxt->extSubSystem = ExternalID;
6913
6914 SKIP_BLANKS;
6915
6916 /*
6917 * Create and update the internal subset.
6918 */
6919 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6920 (!ctxt->disableSAX))
6921 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6922
6923 /*
6924 * Is there any internal subset declarations ?
6925 * they are handled separately in xmlParseInternalSubset()
6926 */
6927 if (RAW == '[')
6928 return;
6929
6930 /*
6931 * We should be at the end of the DOCTYPE declaration.
6932 */
6933 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006934 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006935 }
6936 NEXT;
6937}
6938
6939/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006940 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006941 * @ctxt: an XML parser context
6942 *
6943 * parse the internal subset declaration
6944 *
6945 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6946 */
6947
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006948static void
Owen Taylor3473f882001-02-23 17:55:21 +00006949xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6950 /*
6951 * Is there any DTD definition ?
6952 */
6953 if (RAW == '[') {
6954 ctxt->instate = XML_PARSER_DTD;
6955 NEXT;
6956 /*
6957 * Parse the succession of Markup declarations and
6958 * PEReferences.
6959 * Subsequence (markupdecl | PEReference | S)*
6960 */
6961 while (RAW != ']') {
6962 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006963 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006964
6965 SKIP_BLANKS;
6966 xmlParseMarkupDecl(ctxt);
6967 xmlParsePEReference(ctxt);
6968
6969 /*
6970 * Pop-up of finished entities.
6971 */
6972 while ((RAW == 0) && (ctxt->inputNr > 1))
6973 xmlPopInput(ctxt);
6974
6975 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006976 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006977 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006978 break;
6979 }
6980 }
6981 if (RAW == ']') {
6982 NEXT;
6983 SKIP_BLANKS;
6984 }
6985 }
6986
6987 /*
6988 * We should be at the end of the DOCTYPE declaration.
6989 */
6990 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006991 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006992 }
6993 NEXT;
6994}
6995
Daniel Veillard81273902003-09-30 00:43:48 +00006996#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006997/**
6998 * xmlParseAttribute:
6999 * @ctxt: an XML parser context
7000 * @value: a xmlChar ** used to store the value of the attribute
7001 *
7002 * parse an attribute
7003 *
7004 * [41] Attribute ::= Name Eq AttValue
7005 *
7006 * [ WFC: No External Entity References ]
7007 * Attribute values cannot contain direct or indirect entity references
7008 * to external entities.
7009 *
7010 * [ WFC: No < in Attribute Values ]
7011 * The replacement text of any entity referred to directly or indirectly in
7012 * an attribute value (other than "&lt;") must not contain a <.
7013 *
7014 * [ VC: Attribute Value Type ]
7015 * The attribute must have been declared; the value must be of the type
7016 * declared for it.
7017 *
7018 * [25] Eq ::= S? '=' S?
7019 *
7020 * With namespace:
7021 *
7022 * [NS 11] Attribute ::= QName Eq AttValue
7023 *
7024 * Also the case QName == xmlns:??? is handled independently as a namespace
7025 * definition.
7026 *
7027 * Returns the attribute name, and the value in *value.
7028 */
7029
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007030const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007031xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007032 const xmlChar *name;
7033 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007034
7035 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007036 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007037 name = xmlParseName(ctxt);
7038 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007040 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007041 return(NULL);
7042 }
7043
7044 /*
7045 * read the value
7046 */
7047 SKIP_BLANKS;
7048 if (RAW == '=') {
7049 NEXT;
7050 SKIP_BLANKS;
7051 val = xmlParseAttValue(ctxt);
7052 ctxt->instate = XML_PARSER_CONTENT;
7053 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007054 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007055 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007056 return(NULL);
7057 }
7058
7059 /*
7060 * Check that xml:lang conforms to the specification
7061 * No more registered as an error, just generate a warning now
7062 * since this was deprecated in XML second edition
7063 */
7064 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7065 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007066 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7067 "Malformed value for xml:lang : %s\n",
7068 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007069 }
7070 }
7071
7072 /*
7073 * Check that xml:space conforms to the specification
7074 */
7075 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7076 if (xmlStrEqual(val, BAD_CAST "default"))
7077 *(ctxt->space) = 0;
7078 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7079 *(ctxt->space) = 1;
7080 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007081 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007082"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007083 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007084 }
7085 }
7086
7087 *value = val;
7088 return(name);
7089}
7090
7091/**
7092 * xmlParseStartTag:
7093 * @ctxt: an XML parser context
7094 *
7095 * parse a start of tag either for rule element or
7096 * EmptyElement. In both case we don't parse the tag closing chars.
7097 *
7098 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7099 *
7100 * [ WFC: Unique Att Spec ]
7101 * No attribute name may appear more than once in the same start-tag or
7102 * empty-element tag.
7103 *
7104 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7105 *
7106 * [ WFC: Unique Att Spec ]
7107 * No attribute name may appear more than once in the same start-tag or
7108 * empty-element tag.
7109 *
7110 * With namespace:
7111 *
7112 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7113 *
7114 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7115 *
7116 * Returns the element name parsed
7117 */
7118
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007119const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007120xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007121 const xmlChar *name;
7122 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007123 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007124 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007125 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007126 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 int i;
7128
7129 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007130 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007131
7132 name = xmlParseName(ctxt);
7133 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007134 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007135 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007136 return(NULL);
7137 }
7138
7139 /*
7140 * Now parse the attributes, it ends up with the ending
7141 *
7142 * (S Attribute)* S?
7143 */
7144 SKIP_BLANKS;
7145 GROW;
7146
Daniel Veillard21a0f912001-02-25 19:54:14 +00007147 while ((RAW != '>') &&
7148 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007149 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007150 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007151 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007152
7153 attname = xmlParseAttribute(ctxt, &attvalue);
7154 if ((attname != NULL) && (attvalue != NULL)) {
7155 /*
7156 * [ WFC: Unique Att Spec ]
7157 * No attribute name may appear more than once in the same
7158 * start-tag or empty-element tag.
7159 */
7160 for (i = 0; i < nbatts;i += 2) {
7161 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007162 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007163 xmlFree(attvalue);
7164 goto failed;
7165 }
7166 }
Owen Taylor3473f882001-02-23 17:55:21 +00007167 /*
7168 * Add the pair to atts
7169 */
7170 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007171 maxatts = 22; /* allow for 10 attrs by default */
7172 atts = (const xmlChar **)
7173 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007174 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007175 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007176 if (attvalue != NULL)
7177 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007178 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007179 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007180 ctxt->atts = atts;
7181 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007183 const xmlChar **n;
7184
Owen Taylor3473f882001-02-23 17:55:21 +00007185 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007186 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007187 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007188 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007189 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 if (attvalue != NULL)
7191 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007193 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007195 ctxt->atts = atts;
7196 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007197 }
7198 atts[nbatts++] = attname;
7199 atts[nbatts++] = attvalue;
7200 atts[nbatts] = NULL;
7201 atts[nbatts + 1] = NULL;
7202 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007203 if (attvalue != NULL)
7204 xmlFree(attvalue);
7205 }
7206
7207failed:
7208
Daniel Veillard3772de32002-12-17 10:31:45 +00007209 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007210 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7211 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007212 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007213 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7214 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007215 }
7216 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007217 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7218 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007219 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7220 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007221 break;
7222 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007223 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007224 GROW;
7225 }
7226
7227 /*
7228 * SAX: Start of Element !
7229 */
7230 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007231 (!ctxt->disableSAX)) {
7232 if (nbatts > 0)
7233 ctxt->sax->startElement(ctxt->userData, name, atts);
7234 else
7235 ctxt->sax->startElement(ctxt->userData, name, NULL);
7236 }
Owen Taylor3473f882001-02-23 17:55:21 +00007237
7238 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007239 /* Free only the content strings */
7240 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007241 if (atts[i] != NULL)
7242 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007243 }
7244 return(name);
7245}
7246
7247/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007249 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 * @line: line of the start tag
7251 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007252 *
7253 * parse an end of tag
7254 *
7255 * [42] ETag ::= '</' Name S? '>'
7256 *
7257 * With namespace
7258 *
7259 * [NS 9] ETag ::= '</' QName S? '>'
7260 */
7261
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007262static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007263xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007264 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007265
7266 GROW;
7267 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007268 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007269 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007270 return;
7271 }
7272 SKIP(2);
7273
Daniel Veillard46de64e2002-05-29 08:21:33 +00007274 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007275
7276 /*
7277 * We should definitely be at the ending "S? '>'" part
7278 */
7279 GROW;
7280 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007281 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007282 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007283 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007284 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007285
7286 /*
7287 * [ WFC: Element Type Match ]
7288 * The Name in an element's end-tag must match the element type in the
7289 * start-tag.
7290 *
7291 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007292 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007293 if (name == NULL) name = BAD_CAST "unparseable";
7294 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007295 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007296 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007297 }
7298
7299 /*
7300 * SAX: End of Tag
7301 */
7302 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7303 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007304 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007305
Daniel Veillarde57ec792003-09-10 10:50:59 +00007306 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007307 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007308 return;
7309}
7310
7311/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007312 * xmlParseEndTag:
7313 * @ctxt: an XML parser context
7314 *
7315 * parse an end of tag
7316 *
7317 * [42] ETag ::= '</' Name S? '>'
7318 *
7319 * With namespace
7320 *
7321 * [NS 9] ETag ::= '</' QName S? '>'
7322 */
7323
7324void
7325xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 xmlParseEndTag1(ctxt, 0);
7327}
Daniel Veillard81273902003-09-30 00:43:48 +00007328#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007329
7330/************************************************************************
7331 * *
7332 * SAX 2 specific operations *
7333 * *
7334 ************************************************************************/
7335
7336static const xmlChar *
7337xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7338 int len = 0, l;
7339 int c;
7340 int count = 0;
7341
7342 /*
7343 * Handler for more complex cases
7344 */
7345 GROW;
7346 c = CUR_CHAR(l);
7347 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007348 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007349 return(NULL);
7350 }
7351
7352 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007353 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007354 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007355 (IS_COMBINING(c)) ||
7356 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007357 if (count++ > 100) {
7358 count = 0;
7359 GROW;
7360 }
7361 len += l;
7362 NEXTL(l);
7363 c = CUR_CHAR(l);
7364 }
7365 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7366}
7367
7368/*
7369 * xmlGetNamespace:
7370 * @ctxt: an XML parser context
7371 * @prefix: the prefix to lookup
7372 *
7373 * Lookup the namespace name for the @prefix (which ca be NULL)
7374 * The prefix must come from the @ctxt->dict dictionnary
7375 *
7376 * Returns the namespace name or NULL if not bound
7377 */
7378static const xmlChar *
7379xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7380 int i;
7381
Daniel Veillarde57ec792003-09-10 10:50:59 +00007382 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007383 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (ctxt->nsTab[i] == prefix) {
7385 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7386 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007388 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return(NULL);
7390}
7391
7392/**
7393 * xmlParseNCName:
7394 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007395 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007396 *
7397 * parse an XML name.
7398 *
7399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7400 * CombiningChar | Extender
7401 *
7402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7403 *
7404 * Returns the Name parsed or NULL
7405 */
7406
7407static const xmlChar *
7408xmlParseNCName(xmlParserCtxtPtr ctxt) {
7409 const xmlChar *in;
7410 const xmlChar *ret;
7411 int count = 0;
7412
7413 /*
7414 * Accelerator for simple ASCII names
7415 */
7416 in = ctxt->input->cur;
7417 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7418 ((*in >= 0x41) && (*in <= 0x5A)) ||
7419 (*in == '_')) {
7420 in++;
7421 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7422 ((*in >= 0x41) && (*in <= 0x5A)) ||
7423 ((*in >= 0x30) && (*in <= 0x39)) ||
7424 (*in == '_') || (*in == '-') ||
7425 (*in == '.'))
7426 in++;
7427 if ((*in > 0) && (*in < 0x80)) {
7428 count = in - ctxt->input->cur;
7429 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7430 ctxt->input->cur = in;
7431 ctxt->nbChars += count;
7432 ctxt->input->col += count;
7433 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007434 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 }
7436 return(ret);
7437 }
7438 }
7439 return(xmlParseNCNameComplex(ctxt));
7440}
7441
7442/**
7443 * xmlParseQName:
7444 * @ctxt: an XML parser context
7445 * @prefix: pointer to store the prefix part
7446 *
7447 * parse an XML Namespace QName
7448 *
7449 * [6] QName ::= (Prefix ':')? LocalPart
7450 * [7] Prefix ::= NCName
7451 * [8] LocalPart ::= NCName
7452 *
7453 * Returns the Name parsed or NULL
7454 */
7455
7456static const xmlChar *
7457xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7458 const xmlChar *l, *p;
7459
7460 GROW;
7461
7462 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007463 if (l == NULL) {
7464 if (CUR == ':') {
7465 l = xmlParseName(ctxt);
7466 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007467 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7468 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007469 *prefix = NULL;
7470 return(l);
7471 }
7472 }
7473 return(NULL);
7474 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007475 if (CUR == ':') {
7476 NEXT;
7477 p = l;
7478 l = xmlParseNCName(ctxt);
7479 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007480 xmlChar *tmp;
7481
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007482 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7483 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007484 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7485 p = xmlDictLookup(ctxt->dict, tmp, -1);
7486 if (tmp != NULL) xmlFree(tmp);
7487 *prefix = NULL;
7488 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007489 }
7490 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007491 xmlChar *tmp;
7492
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007493 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7494 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007495 NEXT;
7496 tmp = (xmlChar *) xmlParseName(ctxt);
7497 if (tmp != NULL) {
7498 tmp = xmlBuildQName(tmp, l, NULL, 0);
7499 l = xmlDictLookup(ctxt->dict, tmp, -1);
7500 if (tmp != NULL) xmlFree(tmp);
7501 *prefix = p;
7502 return(l);
7503 }
7504 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7505 l = xmlDictLookup(ctxt->dict, tmp, -1);
7506 if (tmp != NULL) xmlFree(tmp);
7507 *prefix = p;
7508 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509 }
7510 *prefix = p;
7511 } else
7512 *prefix = NULL;
7513 return(l);
7514}
7515
7516/**
7517 * xmlParseQNameAndCompare:
7518 * @ctxt: an XML parser context
7519 * @name: the localname
7520 * @prefix: the prefix, if any.
7521 *
7522 * parse an XML name and compares for match
7523 * (specialized for endtag parsing)
7524 *
7525 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7526 * and the name for mismatch
7527 */
7528
7529static const xmlChar *
7530xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7531 xmlChar const *prefix) {
7532 const xmlChar *cmp = name;
7533 const xmlChar *in;
7534 const xmlChar *ret;
7535 const xmlChar *prefix2;
7536
7537 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7538
7539 GROW;
7540 in = ctxt->input->cur;
7541
7542 cmp = prefix;
7543 while (*in != 0 && *in == *cmp) {
7544 ++in;
7545 ++cmp;
7546 }
7547 if ((*cmp == 0) && (*in == ':')) {
7548 in++;
7549 cmp = name;
7550 while (*in != 0 && *in == *cmp) {
7551 ++in;
7552 ++cmp;
7553 }
William M. Brack76e95df2003-10-18 16:20:14 +00007554 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007555 /* success */
7556 ctxt->input->cur = in;
7557 return((const xmlChar*) 1);
7558 }
7559 }
7560 /*
7561 * all strings coms from the dictionary, equality can be done directly
7562 */
7563 ret = xmlParseQName (ctxt, &prefix2);
7564 if ((ret == name) && (prefix == prefix2))
7565 return((const xmlChar*) 1);
7566 return ret;
7567}
7568
7569/**
7570 * xmlParseAttValueInternal:
7571 * @ctxt: an XML parser context
7572 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007573 * @alloc: whether the attribute was reallocated as a new string
7574 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007575 *
7576 * parse a value for an attribute.
7577 * NOTE: if no normalization is needed, the routine will return pointers
7578 * directly from the data buffer.
7579 *
7580 * 3.3.3 Attribute-Value Normalization:
7581 * Before the value of an attribute is passed to the application or
7582 * checked for validity, the XML processor must normalize it as follows:
7583 * - a character reference is processed by appending the referenced
7584 * character to the attribute value
7585 * - an entity reference is processed by recursively processing the
7586 * replacement text of the entity
7587 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7588 * appending #x20 to the normalized value, except that only a single
7589 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7590 * parsed entity or the literal entity value of an internal parsed entity
7591 * - other characters are processed by appending them to the normalized value
7592 * If the declared value is not CDATA, then the XML processor must further
7593 * process the normalized attribute value by discarding any leading and
7594 * trailing space (#x20) characters, and by replacing sequences of space
7595 * (#x20) characters by a single space (#x20) character.
7596 * All attributes for which no declaration has been read should be treated
7597 * by a non-validating parser as if declared CDATA.
7598 *
7599 * Returns the AttValue parsed or NULL. The value has to be freed by the
7600 * caller if it was copied, this can be detected by val[*len] == 0.
7601 */
7602
7603static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007604xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7605 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007606{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007607 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007608 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 xmlChar *ret = NULL;
7610
7611 GROW;
7612 in = (xmlChar *) CUR_PTR;
7613 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007614 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007615 return (NULL);
7616 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007618
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007619 /*
7620 * try to handle in this routine the most common case where no
7621 * allocation of a new string is required and where content is
7622 * pure ASCII.
7623 */
7624 limit = *in++;
7625 end = ctxt->input->end;
7626 start = in;
7627 if (in >= end) {
7628 const xmlChar *oldbase = ctxt->input->base;
7629 GROW;
7630 if (oldbase != ctxt->input->base) {
7631 long delta = ctxt->input->base - oldbase;
7632 start = start + delta;
7633 in = in + delta;
7634 }
7635 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007636 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007637 if (normalize) {
7638 /*
7639 * Skip any leading spaces
7640 */
7641 while ((in < end) && (*in != limit) &&
7642 ((*in == 0x20) || (*in == 0x9) ||
7643 (*in == 0xA) || (*in == 0xD))) {
7644 in++;
7645 start = in;
7646 if (in >= end) {
7647 const xmlChar *oldbase = ctxt->input->base;
7648 GROW;
7649 if (oldbase != ctxt->input->base) {
7650 long delta = ctxt->input->base - oldbase;
7651 start = start + delta;
7652 in = in + delta;
7653 }
7654 end = ctxt->input->end;
7655 }
7656 }
7657 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7658 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7659 if ((*in++ == 0x20) && (*in == 0x20)) break;
7660 if (in >= end) {
7661 const xmlChar *oldbase = ctxt->input->base;
7662 GROW;
7663 if (oldbase != ctxt->input->base) {
7664 long delta = ctxt->input->base - oldbase;
7665 start = start + delta;
7666 in = in + delta;
7667 }
7668 end = ctxt->input->end;
7669 }
7670 }
7671 last = in;
7672 /*
7673 * skip the trailing blanks
7674 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007675 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007676 while ((in < end) && (*in != limit) &&
7677 ((*in == 0x20) || (*in == 0x9) ||
7678 (*in == 0xA) || (*in == 0xD))) {
7679 in++;
7680 if (in >= end) {
7681 const xmlChar *oldbase = ctxt->input->base;
7682 GROW;
7683 if (oldbase != ctxt->input->base) {
7684 long delta = ctxt->input->base - oldbase;
7685 start = start + delta;
7686 in = in + delta;
7687 last = last + delta;
7688 }
7689 end = ctxt->input->end;
7690 }
7691 }
7692 if (*in != limit) goto need_complex;
7693 } else {
7694 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7695 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7696 in++;
7697 if (in >= end) {
7698 const xmlChar *oldbase = ctxt->input->base;
7699 GROW;
7700 if (oldbase != ctxt->input->base) {
7701 long delta = ctxt->input->base - oldbase;
7702 start = start + delta;
7703 in = in + delta;
7704 }
7705 end = ctxt->input->end;
7706 }
7707 }
7708 last = in;
7709 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007710 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007711 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007713 *len = last - start;
7714 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007716 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007717 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007718 }
7719 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007720 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007722need_complex:
7723 if (alloc) *alloc = 1;
7724 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725}
7726
7727/**
7728 * xmlParseAttribute2:
7729 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007730 * @pref: the element prefix
7731 * @elem: the element name
7732 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007734 * @len: an int * to save the length of the attribute
7735 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 *
7737 * parse an attribute in the new SAX2 framework.
7738 *
7739 * Returns the attribute name, and the value in *value, .
7740 */
7741
7742static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007743xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7744 const xmlChar *pref, const xmlChar *elem,
7745 const xmlChar **prefix, xmlChar **value,
7746 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007747 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007748 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007749 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750
7751 *value = NULL;
7752 GROW;
7753 name = xmlParseQName(ctxt, prefix);
7754 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007755 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7756 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 return(NULL);
7758 }
7759
7760 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007761 * get the type if needed
7762 */
7763 if (ctxt->attsSpecial != NULL) {
7764 int type;
7765
7766 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7767 pref, elem, *prefix, name);
7768 if (type != 0) normalize = 1;
7769 }
7770
7771 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 * read the value
7773 */
7774 SKIP_BLANKS;
7775 if (RAW == '=') {
7776 NEXT;
7777 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007778 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007779 ctxt->instate = XML_PARSER_CONTENT;
7780 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007781 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007783 return(NULL);
7784 }
7785
Daniel Veillardd8925572005-06-08 22:34:55 +00007786 if (*prefix == ctxt->str_xml) {
7787 /*
7788 * Check that xml:lang conforms to the specification
7789 * No more registered as an error, just generate a warning now
7790 * since this was deprecated in XML second edition
7791 */
7792 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7793 internal_val = xmlStrndup(val, *len);
7794 if (!xmlCheckLanguageID(internal_val)) {
7795 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7796 "Malformed value for xml:lang : %s\n",
7797 internal_val, NULL);
7798 }
7799 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800
Daniel Veillardd8925572005-06-08 22:34:55 +00007801 /*
7802 * Check that xml:space conforms to the specification
7803 */
7804 if (xmlStrEqual(name, BAD_CAST "space")) {
7805 internal_val = xmlStrndup(val, *len);
7806 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7807 *(ctxt->space) = 0;
7808 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7809 *(ctxt->space) = 1;
7810 else {
7811 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007813 internal_val, NULL);
7814 }
7815 }
7816 if (internal_val) {
7817 xmlFree(internal_val);
7818 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007819 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007820
7821 *value = val;
7822 return(name);
7823}
7824
7825/**
7826 * xmlParseStartTag2:
7827 * @ctxt: an XML parser context
7828 *
7829 * parse a start of tag either for rule element or
7830 * EmptyElement. In both case we don't parse the tag closing chars.
7831 * This routine is called when running SAX2 parsing
7832 *
7833 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7834 *
7835 * [ WFC: Unique Att Spec ]
7836 * No attribute name may appear more than once in the same start-tag or
7837 * empty-element tag.
7838 *
7839 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7840 *
7841 * [ WFC: Unique Att Spec ]
7842 * No attribute name may appear more than once in the same start-tag or
7843 * empty-element tag.
7844 *
7845 * With namespace:
7846 *
7847 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7848 *
7849 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7850 *
7851 * Returns the element name parsed
7852 */
7853
7854static const xmlChar *
7855xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007856 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007857 const xmlChar *localname;
7858 const xmlChar *prefix;
7859 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007860 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861 const xmlChar *nsname;
7862 xmlChar *attvalue;
7863 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007864 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007865 int nratts, nbatts, nbdef;
7866 int i, j, nbNs, attval;
7867 const xmlChar *base;
7868 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007869 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007870
7871 if (RAW != '<') return(NULL);
7872 NEXT1;
7873
7874 /*
7875 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7876 * point since the attribute values may be stored as pointers to
7877 * the buffer and calling SHRINK would destroy them !
7878 * The Shrinking is only possible once the full set of attribute
7879 * callbacks have been done.
7880 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007881reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007882 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883 base = ctxt->input->base;
7884 cur = ctxt->input->cur - ctxt->input->base;
7885 nbatts = 0;
7886 nratts = 0;
7887 nbdef = 0;
7888 nbNs = 0;
7889 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007890 /* Forget any namespaces added during an earlier parse of this element. */
7891 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007892
7893 localname = xmlParseQName(ctxt, &prefix);
7894 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007895 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7896 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007897 return(NULL);
7898 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007899 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900
7901 /*
7902 * Now parse the attributes, it ends up with the ending
7903 *
7904 * (S Attribute)* S?
7905 */
7906 SKIP_BLANKS;
7907 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007908 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007909
7910 while ((RAW != '>') &&
7911 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007912 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913 const xmlChar *q = CUR_PTR;
7914 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007915 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007916
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007917 attname = xmlParseAttribute2(ctxt, prefix, localname,
7918 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007919 if ((attname != NULL) && (attvalue != NULL)) {
7920 if (len < 0) len = xmlStrlen(attvalue);
7921 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007922 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7923 xmlURIPtr uri;
7924
7925 if (*URL != 0) {
7926 uri = xmlParseURI((const char *) URL);
7927 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007928 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7929 "xmlns: %s not a valid URI\n",
7930 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007931 } else {
7932 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007933 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7934 "xmlns: URI %s is not absolute\n",
7935 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007936 }
7937 xmlFreeURI(uri);
7938 }
7939 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007941 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007943 for (j = 1;j <= nbNs;j++)
7944 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7945 break;
7946 if (j <= nbNs)
7947 xmlErrAttributeDup(ctxt, NULL, attname);
7948 else
7949 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007950 if (alloc != 0) xmlFree(attvalue);
7951 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 continue;
7953 }
7954 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7956 xmlURIPtr uri;
7957
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007958 if (attname == ctxt->str_xml) {
7959 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007960 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7961 "xml namespace prefix mapped to wrong URI\n",
7962 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007963 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007964 /*
7965 * Do not keep a namespace definition node
7966 */
7967 if (alloc != 0) xmlFree(attvalue);
7968 SKIP_BLANKS;
7969 continue;
7970 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007971 uri = xmlParseURI((const char *) URL);
7972 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007973 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7974 "xmlns:%s: '%s' is not a valid URI\n",
7975 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007976 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007977 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007978 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7979 "xmlns:%s: URI %s is not absolute\n",
7980 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007981 }
7982 xmlFreeURI(uri);
7983 }
7984
Daniel Veillard0fb18932003-09-07 09:14:37 +00007985 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007986 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007988 for (j = 1;j <= nbNs;j++)
7989 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7990 break;
7991 if (j <= nbNs)
7992 xmlErrAttributeDup(ctxt, aprefix, attname);
7993 else
7994 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007995 if (alloc != 0) xmlFree(attvalue);
7996 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007997 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 continue;
7999 }
8000
8001 /*
8002 * Add the pair to atts
8003 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008004 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8005 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008006 if (attvalue[len] == 0)
8007 xmlFree(attvalue);
8008 goto failed;
8009 }
8010 maxatts = ctxt->maxatts;
8011 atts = ctxt->atts;
8012 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008013 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008014 atts[nbatts++] = attname;
8015 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008016 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 atts[nbatts++] = attvalue;
8018 attvalue += len;
8019 atts[nbatts++] = attvalue;
8020 /*
8021 * tag if some deallocation is needed
8022 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008023 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008024 } else {
8025 if ((attvalue != NULL) && (attvalue[len] == 0))
8026 xmlFree(attvalue);
8027 }
8028
8029failed:
8030
8031 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008032 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008033 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8034 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008035 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8037 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008038 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008039 }
8040 SKIP_BLANKS;
8041 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8042 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008043 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008045 break;
8046 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008047 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008048 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 }
8050
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008052 * The attributes defaulting
8053 */
8054 if (ctxt->attsDefault != NULL) {
8055 xmlDefAttrsPtr defaults;
8056
8057 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8058 if (defaults != NULL) {
8059 for (i = 0;i < defaults->nbAttrs;i++) {
8060 attname = defaults->values[4 * i];
8061 aprefix = defaults->values[4 * i + 1];
8062
8063 /*
8064 * special work for namespaces defaulted defs
8065 */
8066 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8067 /*
8068 * check that it's not a defined namespace
8069 */
8070 for (j = 1;j <= nbNs;j++)
8071 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8072 break;
8073 if (j <= nbNs) continue;
8074
8075 nsname = xmlGetNamespace(ctxt, NULL);
8076 if (nsname != defaults->values[4 * i + 2]) {
8077 if (nsPush(ctxt, NULL,
8078 defaults->values[4 * i + 2]) > 0)
8079 nbNs++;
8080 }
8081 } else if (aprefix == ctxt->str_xmlns) {
8082 /*
8083 * check that it's not a defined namespace
8084 */
8085 for (j = 1;j <= nbNs;j++)
8086 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8087 break;
8088 if (j <= nbNs) continue;
8089
8090 nsname = xmlGetNamespace(ctxt, attname);
8091 if (nsname != defaults->values[2]) {
8092 if (nsPush(ctxt, attname,
8093 defaults->values[4 * i + 2]) > 0)
8094 nbNs++;
8095 }
8096 } else {
8097 /*
8098 * check that it's not a defined attribute
8099 */
8100 for (j = 0;j < nbatts;j+=5) {
8101 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8102 break;
8103 }
8104 if (j < nbatts) continue;
8105
8106 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8107 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008108 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008109 }
8110 maxatts = ctxt->maxatts;
8111 atts = ctxt->atts;
8112 }
8113 atts[nbatts++] = attname;
8114 atts[nbatts++] = aprefix;
8115 if (aprefix == NULL)
8116 atts[nbatts++] = NULL;
8117 else
8118 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8119 atts[nbatts++] = defaults->values[4 * i + 2];
8120 atts[nbatts++] = defaults->values[4 * i + 3];
8121 nbdef++;
8122 }
8123 }
8124 }
8125 }
8126
Daniel Veillarde70c8772003-11-25 07:21:18 +00008127 /*
8128 * The attributes checkings
8129 */
8130 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008131 /*
8132 * The default namespace does not apply to attribute names.
8133 */
8134 if (atts[i + 1] != NULL) {
8135 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8136 if (nsname == NULL) {
8137 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8138 "Namespace prefix %s for %s on %s is not defined\n",
8139 atts[i + 1], atts[i], localname);
8140 }
8141 atts[i + 2] = nsname;
8142 } else
8143 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008144 /*
8145 * [ WFC: Unique Att Spec ]
8146 * No attribute name may appear more than once in the same
8147 * start-tag or empty-element tag.
8148 * As extended by the Namespace in XML REC.
8149 */
8150 for (j = 0; j < i;j += 5) {
8151 if (atts[i] == atts[j]) {
8152 if (atts[i+1] == atts[j+1]) {
8153 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8154 break;
8155 }
8156 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8157 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8158 "Namespaced Attribute %s in '%s' redefined\n",
8159 atts[i], nsname, NULL);
8160 break;
8161 }
8162 }
8163 }
8164 }
8165
Daniel Veillarde57ec792003-09-10 10:50:59 +00008166 nsname = xmlGetNamespace(ctxt, prefix);
8167 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008168 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8169 "Namespace prefix %s on %s is not defined\n",
8170 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008171 }
8172 *pref = prefix;
8173 *URI = nsname;
8174
8175 /*
8176 * SAX: Start of Element !
8177 */
8178 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8179 (!ctxt->disableSAX)) {
8180 if (nbNs > 0)
8181 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8182 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8183 nbatts / 5, nbdef, atts);
8184 else
8185 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8186 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8187 }
8188
8189 /*
8190 * Free up attribute allocated strings if needed
8191 */
8192 if (attval != 0) {
8193 for (i = 3,j = 0; j < nratts;i += 5,j++)
8194 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8195 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 }
8197
8198 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008199
8200base_changed:
8201 /*
8202 * the attribute strings are valid iif the base didn't changed
8203 */
8204 if (attval != 0) {
8205 for (i = 3,j = 0; j < nratts;i += 5,j++)
8206 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8207 xmlFree((xmlChar *) atts[i]);
8208 }
8209 ctxt->input->cur = ctxt->input->base + cur;
8210 if (ctxt->wellFormed == 1) {
8211 goto reparse;
8212 }
8213 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008214}
8215
8216/**
8217 * xmlParseEndTag2:
8218 * @ctxt: an XML parser context
8219 * @line: line of the start tag
8220 * @nsNr: number of namespaces on the start tag
8221 *
8222 * parse an end of tag
8223 *
8224 * [42] ETag ::= '</' Name S? '>'
8225 *
8226 * With namespace
8227 *
8228 * [NS 9] ETag ::= '</' QName S? '>'
8229 */
8230
8231static void
8232xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008233 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234 const xmlChar *name;
8235
8236 GROW;
8237 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008238 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008239 return;
8240 }
8241 SKIP(2);
8242
William M. Brack13dfa872004-09-18 04:52:08 +00008243 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008244 if (ctxt->input->cur[tlen] == '>') {
8245 ctxt->input->cur += tlen + 1;
8246 goto done;
8247 }
8248 ctxt->input->cur += tlen;
8249 name = (xmlChar*)1;
8250 } else {
8251 if (prefix == NULL)
8252 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8253 else
8254 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8255 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256
8257 /*
8258 * We should definitely be at the ending "S? '>'" part
8259 */
8260 GROW;
8261 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008262 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 } else
8265 NEXT1;
8266
8267 /*
8268 * [ WFC: Element Type Match ]
8269 * The Name in an element's end-tag must match the element type in the
8270 * start-tag.
8271 *
8272 */
8273 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008274 if (name == NULL) name = BAD_CAST "unparseable";
8275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008277 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 }
8279
8280 /*
8281 * SAX: End of Tag
8282 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008283done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008284 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8285 (!ctxt->disableSAX))
8286 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8287
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 spacePop(ctxt);
8289 if (nsNr != 0)
8290 nsPop(ctxt, nsNr);
8291 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008292}
8293
8294/**
Owen Taylor3473f882001-02-23 17:55:21 +00008295 * xmlParseCDSect:
8296 * @ctxt: an XML parser context
8297 *
8298 * Parse escaped pure raw content.
8299 *
8300 * [18] CDSect ::= CDStart CData CDEnd
8301 *
8302 * [19] CDStart ::= '<![CDATA['
8303 *
8304 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8305 *
8306 * [21] CDEnd ::= ']]>'
8307 */
8308void
8309xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8310 xmlChar *buf = NULL;
8311 int len = 0;
8312 int size = XML_PARSER_BUFFER_SIZE;
8313 int r, rl;
8314 int s, sl;
8315 int cur, l;
8316 int count = 0;
8317
Daniel Veillard8f597c32003-10-06 08:19:27 +00008318 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008319 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008320 SKIP(9);
8321 } else
8322 return;
8323
8324 ctxt->instate = XML_PARSER_CDATA_SECTION;
8325 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008326 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008327 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008328 ctxt->instate = XML_PARSER_CONTENT;
8329 return;
8330 }
8331 NEXTL(rl);
8332 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008333 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008334 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008335 ctxt->instate = XML_PARSER_CONTENT;
8336 return;
8337 }
8338 NEXTL(sl);
8339 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008340 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008341 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008342 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008343 return;
8344 }
William M. Brack871611b2003-10-18 04:53:14 +00008345 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008346 ((r != ']') || (s != ']') || (cur != '>'))) {
8347 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008348 xmlChar *tmp;
8349
Owen Taylor3473f882001-02-23 17:55:21 +00008350 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008351 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8352 if (tmp == NULL) {
8353 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008354 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008355 return;
8356 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008357 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008358 }
8359 COPY_BUF(rl,buf,len,r);
8360 r = s;
8361 rl = sl;
8362 s = cur;
8363 sl = l;
8364 count++;
8365 if (count > 50) {
8366 GROW;
8367 count = 0;
8368 }
8369 NEXTL(l);
8370 cur = CUR_CHAR(l);
8371 }
8372 buf[len] = 0;
8373 ctxt->instate = XML_PARSER_CONTENT;
8374 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008375 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008376 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008377 xmlFree(buf);
8378 return;
8379 }
8380 NEXTL(l);
8381
8382 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008383 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008384 */
8385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8386 if (ctxt->sax->cdataBlock != NULL)
8387 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008388 else if (ctxt->sax->characters != NULL)
8389 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008390 }
8391 xmlFree(buf);
8392}
8393
8394/**
8395 * xmlParseContent:
8396 * @ctxt: an XML parser context
8397 *
8398 * Parse a content:
8399 *
8400 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8401 */
8402
8403void
8404xmlParseContent(xmlParserCtxtPtr ctxt) {
8405 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008406 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008407 ((RAW != '<') || (NXT(1) != '/')) &&
8408 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008409 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008410 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008411 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008412
8413 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008414 * First case : a Processing Instruction.
8415 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008416 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008417 xmlParsePI(ctxt);
8418 }
8419
8420 /*
8421 * Second case : a CDSection
8422 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008423 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008424 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008425 xmlParseCDSect(ctxt);
8426 }
8427
8428 /*
8429 * Third case : a comment
8430 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008431 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008432 (NXT(2) == '-') && (NXT(3) == '-')) {
8433 xmlParseComment(ctxt);
8434 ctxt->instate = XML_PARSER_CONTENT;
8435 }
8436
8437 /*
8438 * Fourth case : a sub-element.
8439 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008440 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008441 xmlParseElement(ctxt);
8442 }
8443
8444 /*
8445 * Fifth case : a reference. If if has not been resolved,
8446 * parsing returns it's Name, create the node
8447 */
8448
Daniel Veillard21a0f912001-02-25 19:54:14 +00008449 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008450 xmlParseReference(ctxt);
8451 }
8452
8453 /*
8454 * Last case, text. Note that References are handled directly.
8455 */
8456 else {
8457 xmlParseCharData(ctxt, 0);
8458 }
8459
8460 GROW;
8461 /*
8462 * Pop-up of finished entities.
8463 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008464 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008465 xmlPopInput(ctxt);
8466 SHRINK;
8467
Daniel Veillardfdc91562002-07-01 21:52:03 +00008468 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008469 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8470 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008471 ctxt->instate = XML_PARSER_EOF;
8472 break;
8473 }
8474 }
8475}
8476
8477/**
8478 * xmlParseElement:
8479 * @ctxt: an XML parser context
8480 *
8481 * parse an XML element, this is highly recursive
8482 *
8483 * [39] element ::= EmptyElemTag | STag content ETag
8484 *
8485 * [ WFC: Element Type Match ]
8486 * The Name in an element's end-tag must match the element type in the
8487 * start-tag.
8488 *
Owen Taylor3473f882001-02-23 17:55:21 +00008489 */
8490
8491void
8492xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008493 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 const xmlChar *prefix;
8495 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008496 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008497 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008499 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008500
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008501 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8502 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8503 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8504 xmlParserMaxDepth);
8505 ctxt->instate = XML_PARSER_EOF;
8506 return;
8507 }
8508
Owen Taylor3473f882001-02-23 17:55:21 +00008509 /* Capture start position */
8510 if (ctxt->record_info) {
8511 node_info.begin_pos = ctxt->input->consumed +
8512 (CUR_PTR - ctxt->input->base);
8513 node_info.begin_line = ctxt->input->line;
8514 }
8515
8516 if (ctxt->spaceNr == 0)
8517 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008518 else if (*ctxt->space == -2)
8519 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008520 else
8521 spacePush(ctxt, *ctxt->space);
8522
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008523 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008524#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008526#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008527 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008528#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 else
8530 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008531#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008532 if (name == NULL) {
8533 spacePop(ctxt);
8534 return;
8535 }
8536 namePush(ctxt, name);
8537 ret = ctxt->node;
8538
Daniel Veillard4432df22003-09-28 18:58:27 +00008539#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008540 /*
8541 * [ VC: Root Element Type ]
8542 * The Name in the document type declaration must match the element
8543 * type of the root element.
8544 */
8545 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8546 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8547 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008548#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008549
8550 /*
8551 * Check for an Empty Element.
8552 */
8553 if ((RAW == '/') && (NXT(1) == '>')) {
8554 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008555 if (ctxt->sax2) {
8556 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8557 (!ctxt->disableSAX))
8558 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008559#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008560 } else {
8561 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8562 (!ctxt->disableSAX))
8563 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008564#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008565 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 namePop(ctxt);
8567 spacePop(ctxt);
8568 if (nsNr != ctxt->nsNr)
8569 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008570 if ( ret != NULL && ctxt->record_info ) {
8571 node_info.end_pos = ctxt->input->consumed +
8572 (CUR_PTR - ctxt->input->base);
8573 node_info.end_line = ctxt->input->line;
8574 node_info.node = ret;
8575 xmlParserAddNodeInfo(ctxt, &node_info);
8576 }
8577 return;
8578 }
8579 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008580 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008581 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008582 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8583 "Couldn't find end of Start Tag %s line %d\n",
8584 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008585
8586 /*
8587 * end of parsing of this node.
8588 */
8589 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008590 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008591 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008592 if (nsNr != ctxt->nsNr)
8593 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008594
8595 /*
8596 * Capture end position and add node
8597 */
8598 if ( ret != NULL && ctxt->record_info ) {
8599 node_info.end_pos = ctxt->input->consumed +
8600 (CUR_PTR - ctxt->input->base);
8601 node_info.end_line = ctxt->input->line;
8602 node_info.node = ret;
8603 xmlParserAddNodeInfo(ctxt, &node_info);
8604 }
8605 return;
8606 }
8607
8608 /*
8609 * Parse the content of the element:
8610 */
8611 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008612 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008614 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008615 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008616
8617 /*
8618 * end of parsing of this node.
8619 */
8620 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008622 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008623 if (nsNr != ctxt->nsNr)
8624 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 return;
8626 }
8627
8628 /*
8629 * parse the end of tag: '</' should be here.
8630 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008631 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008632 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008634 }
8635#ifdef LIBXML_SAX1_ENABLED
8636 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008638#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008639
8640 /*
8641 * Capture end position and add node
8642 */
8643 if ( ret != NULL && ctxt->record_info ) {
8644 node_info.end_pos = ctxt->input->consumed +
8645 (CUR_PTR - ctxt->input->base);
8646 node_info.end_line = ctxt->input->line;
8647 node_info.node = ret;
8648 xmlParserAddNodeInfo(ctxt, &node_info);
8649 }
8650}
8651
8652/**
8653 * xmlParseVersionNum:
8654 * @ctxt: an XML parser context
8655 *
8656 * parse the XML version value.
8657 *
8658 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8659 *
8660 * Returns the string giving the XML version number, or NULL
8661 */
8662xmlChar *
8663xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8664 xmlChar *buf = NULL;
8665 int len = 0;
8666 int size = 10;
8667 xmlChar cur;
8668
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008669 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008670 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008671 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008672 return(NULL);
8673 }
8674 cur = CUR;
8675 while (((cur >= 'a') && (cur <= 'z')) ||
8676 ((cur >= 'A') && (cur <= 'Z')) ||
8677 ((cur >= '0') && (cur <= '9')) ||
8678 (cur == '_') || (cur == '.') ||
8679 (cur == ':') || (cur == '-')) {
8680 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008681 xmlChar *tmp;
8682
Owen Taylor3473f882001-02-23 17:55:21 +00008683 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008684 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8685 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008686 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 return(NULL);
8688 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008689 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008690 }
8691 buf[len++] = cur;
8692 NEXT;
8693 cur=CUR;
8694 }
8695 buf[len] = 0;
8696 return(buf);
8697}
8698
8699/**
8700 * xmlParseVersionInfo:
8701 * @ctxt: an XML parser context
8702 *
8703 * parse the XML version.
8704 *
8705 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8706 *
8707 * [25] Eq ::= S? '=' S?
8708 *
8709 * Returns the version string, e.g. "1.0"
8710 */
8711
8712xmlChar *
8713xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8714 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008715
Daniel Veillarda07050d2003-10-19 14:46:32 +00008716 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008717 SKIP(7);
8718 SKIP_BLANKS;
8719 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008720 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008721 return(NULL);
8722 }
8723 NEXT;
8724 SKIP_BLANKS;
8725 if (RAW == '"') {
8726 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008727 version = xmlParseVersionNum(ctxt);
8728 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008729 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008730 } else
8731 NEXT;
8732 } else if (RAW == '\''){
8733 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008734 version = xmlParseVersionNum(ctxt);
8735 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008736 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008737 } else
8738 NEXT;
8739 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008740 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008741 }
8742 }
8743 return(version);
8744}
8745
8746/**
8747 * xmlParseEncName:
8748 * @ctxt: an XML parser context
8749 *
8750 * parse the XML encoding name
8751 *
8752 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8753 *
8754 * Returns the encoding name value or NULL
8755 */
8756xmlChar *
8757xmlParseEncName(xmlParserCtxtPtr ctxt) {
8758 xmlChar *buf = NULL;
8759 int len = 0;
8760 int size = 10;
8761 xmlChar cur;
8762
8763 cur = CUR;
8764 if (((cur >= 'a') && (cur <= 'z')) ||
8765 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008766 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008767 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008768 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008769 return(NULL);
8770 }
8771
8772 buf[len++] = cur;
8773 NEXT;
8774 cur = CUR;
8775 while (((cur >= 'a') && (cur <= 'z')) ||
8776 ((cur >= 'A') && (cur <= 'Z')) ||
8777 ((cur >= '0') && (cur <= '9')) ||
8778 (cur == '.') || (cur == '_') ||
8779 (cur == '-')) {
8780 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008781 xmlChar *tmp;
8782
Owen Taylor3473f882001-02-23 17:55:21 +00008783 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008784 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8785 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008786 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008787 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008788 return(NULL);
8789 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008790 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008791 }
8792 buf[len++] = cur;
8793 NEXT;
8794 cur = CUR;
8795 if (cur == 0) {
8796 SHRINK;
8797 GROW;
8798 cur = CUR;
8799 }
8800 }
8801 buf[len] = 0;
8802 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008803 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008804 }
8805 return(buf);
8806}
8807
8808/**
8809 * xmlParseEncodingDecl:
8810 * @ctxt: an XML parser context
8811 *
8812 * parse the XML encoding declaration
8813 *
8814 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8815 *
8816 * this setups the conversion filters.
8817 *
8818 * Returns the encoding value or NULL
8819 */
8820
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008821const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008822xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8823 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008824
8825 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008826 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008827 SKIP(8);
8828 SKIP_BLANKS;
8829 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008830 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008831 return(NULL);
8832 }
8833 NEXT;
8834 SKIP_BLANKS;
8835 if (RAW == '"') {
8836 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008837 encoding = xmlParseEncName(ctxt);
8838 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008839 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008840 } else
8841 NEXT;
8842 } else if (RAW == '\''){
8843 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008844 encoding = xmlParseEncName(ctxt);
8845 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008846 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008847 } else
8848 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008849 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008851 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008852 /*
8853 * UTF-16 encoding stwich has already taken place at this stage,
8854 * more over the little-endian/big-endian selection is already done
8855 */
8856 if ((encoding != NULL) &&
8857 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8858 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008859 if (ctxt->encoding != NULL)
8860 xmlFree((xmlChar *) ctxt->encoding);
8861 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008862 }
8863 /*
8864 * UTF-8 encoding is handled natively
8865 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008866 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008867 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8868 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008869 if (ctxt->encoding != NULL)
8870 xmlFree((xmlChar *) ctxt->encoding);
8871 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008872 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008873 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008874 xmlCharEncodingHandlerPtr handler;
8875
8876 if (ctxt->input->encoding != NULL)
8877 xmlFree((xmlChar *) ctxt->input->encoding);
8878 ctxt->input->encoding = encoding;
8879
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008880 handler = xmlFindCharEncodingHandler((const char *) encoding);
8881 if (handler != NULL) {
8882 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008883 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008884 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008885 "Unsupported encoding %s\n", encoding);
8886 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008887 }
8888 }
8889 }
8890 return(encoding);
8891}
8892
8893/**
8894 * xmlParseSDDecl:
8895 * @ctxt: an XML parser context
8896 *
8897 * parse the XML standalone declaration
8898 *
8899 * [32] SDDecl ::= S 'standalone' Eq
8900 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8901 *
8902 * [ VC: Standalone Document Declaration ]
8903 * TODO The standalone document declaration must have the value "no"
8904 * if any external markup declarations contain declarations of:
8905 * - attributes with default values, if elements to which these
8906 * attributes apply appear in the document without specifications
8907 * of values for these attributes, or
8908 * - entities (other than amp, lt, gt, apos, quot), if references
8909 * to those entities appear in the document, or
8910 * - attributes with values subject to normalization, where the
8911 * attribute appears in the document with a value which will change
8912 * as a result of normalization, or
8913 * - element types with element content, if white space occurs directly
8914 * within any instance of those types.
8915 *
8916 * Returns 1 if standalone, 0 otherwise
8917 */
8918
8919int
8920xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8921 int standalone = -1;
8922
8923 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008924 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008925 SKIP(10);
8926 SKIP_BLANKS;
8927 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008928 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008929 return(standalone);
8930 }
8931 NEXT;
8932 SKIP_BLANKS;
8933 if (RAW == '\''){
8934 NEXT;
8935 if ((RAW == 'n') && (NXT(1) == 'o')) {
8936 standalone = 0;
8937 SKIP(2);
8938 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8939 (NXT(2) == 's')) {
8940 standalone = 1;
8941 SKIP(3);
8942 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008943 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008944 }
8945 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008946 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008947 } else
8948 NEXT;
8949 } else if (RAW == '"'){
8950 NEXT;
8951 if ((RAW == 'n') && (NXT(1) == 'o')) {
8952 standalone = 0;
8953 SKIP(2);
8954 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8955 (NXT(2) == 's')) {
8956 standalone = 1;
8957 SKIP(3);
8958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008959 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008960 }
8961 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008962 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008963 } else
8964 NEXT;
8965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 }
8968 }
8969 return(standalone);
8970}
8971
8972/**
8973 * xmlParseXMLDecl:
8974 * @ctxt: an XML parser context
8975 *
8976 * parse an XML declaration header
8977 *
8978 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8979 */
8980
8981void
8982xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8983 xmlChar *version;
8984
8985 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008986 * This value for standalone indicates that the document has an
8987 * XML declaration but it does not have a standalone attribute.
8988 * It will be overwritten later if a standalone attribute is found.
8989 */
8990 ctxt->input->standalone = -2;
8991
8992 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008993 * We know that '<?xml' is here.
8994 */
8995 SKIP(5);
8996
William M. Brack76e95df2003-10-18 16:20:14 +00008997 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8999 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009000 }
9001 SKIP_BLANKS;
9002
9003 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009004 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009005 */
9006 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009007 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009008 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009009 } else {
9010 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9011 /*
9012 * TODO: Blueberry should be detected here
9013 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009014 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9015 "Unsupported version '%s'\n",
9016 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009017 }
9018 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009019 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009020 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009021 }
Owen Taylor3473f882001-02-23 17:55:21 +00009022
9023 /*
9024 * We may have the encoding declaration
9025 */
William M. Brack76e95df2003-10-18 16:20:14 +00009026 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009027 if ((RAW == '?') && (NXT(1) == '>')) {
9028 SKIP(2);
9029 return;
9030 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009032 }
9033 xmlParseEncodingDecl(ctxt);
9034 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9035 /*
9036 * The XML REC instructs us to stop parsing right here
9037 */
9038 return;
9039 }
9040
9041 /*
9042 * We may have the standalone status.
9043 */
William M. Brack76e95df2003-10-18 16:20:14 +00009044 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009045 if ((RAW == '?') && (NXT(1) == '>')) {
9046 SKIP(2);
9047 return;
9048 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009050 }
9051 SKIP_BLANKS;
9052 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9053
9054 SKIP_BLANKS;
9055 if ((RAW == '?') && (NXT(1) == '>')) {
9056 SKIP(2);
9057 } else if (RAW == '>') {
9058 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009059 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009060 NEXT;
9061 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009062 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009063 MOVETO_ENDTAG(CUR_PTR);
9064 NEXT;
9065 }
9066}
9067
9068/**
9069 * xmlParseMisc:
9070 * @ctxt: an XML parser context
9071 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009072 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009073 *
9074 * [27] Misc ::= Comment | PI | S
9075 */
9076
9077void
9078xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009079 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009080 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009081 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009082 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009083 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009084 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009085 NEXT;
9086 } else
9087 xmlParseComment(ctxt);
9088 }
9089}
9090
9091/**
9092 * xmlParseDocument:
9093 * @ctxt: an XML parser context
9094 *
9095 * parse an XML document (and build a tree if using the standard SAX
9096 * interface).
9097 *
9098 * [1] document ::= prolog element Misc*
9099 *
9100 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9101 *
9102 * Returns 0, -1 in case of error. the parser context is augmented
9103 * as a result of the parsing.
9104 */
9105
9106int
9107xmlParseDocument(xmlParserCtxtPtr ctxt) {
9108 xmlChar start[4];
9109 xmlCharEncoding enc;
9110
9111 xmlInitParser();
9112
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009113 if ((ctxt == NULL) || (ctxt->input == NULL))
9114 return(-1);
9115
Owen Taylor3473f882001-02-23 17:55:21 +00009116 GROW;
9117
9118 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009119 * SAX: detecting the level.
9120 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009121 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122
9123 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009124 * SAX: beginning of the document processing.
9125 */
9126 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9127 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9128
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009129 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9130 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009131 /*
9132 * Get the 4 first bytes and decode the charset
9133 * if enc != XML_CHAR_ENCODING_NONE
9134 * plug some encoding conversion routines.
9135 */
9136 start[0] = RAW;
9137 start[1] = NXT(1);
9138 start[2] = NXT(2);
9139 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009140 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009141 if (enc != XML_CHAR_ENCODING_NONE) {
9142 xmlSwitchEncoding(ctxt, enc);
9143 }
Owen Taylor3473f882001-02-23 17:55:21 +00009144 }
9145
9146
9147 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009148 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009149 }
9150
9151 /*
9152 * Check for the XMLDecl in the Prolog.
9153 */
9154 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009155 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009156
9157 /*
9158 * Note that we will switch encoding on the fly.
9159 */
9160 xmlParseXMLDecl(ctxt);
9161 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9162 /*
9163 * The XML REC instructs us to stop parsing right here
9164 */
9165 return(-1);
9166 }
9167 ctxt->standalone = ctxt->input->standalone;
9168 SKIP_BLANKS;
9169 } else {
9170 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9171 }
9172 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9173 ctxt->sax->startDocument(ctxt->userData);
9174
9175 /*
9176 * The Misc part of the Prolog
9177 */
9178 GROW;
9179 xmlParseMisc(ctxt);
9180
9181 /*
9182 * Then possibly doc type declaration(s) and more Misc
9183 * (doctypedecl Misc*)?
9184 */
9185 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009186 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009187
9188 ctxt->inSubset = 1;
9189 xmlParseDocTypeDecl(ctxt);
9190 if (RAW == '[') {
9191 ctxt->instate = XML_PARSER_DTD;
9192 xmlParseInternalSubset(ctxt);
9193 }
9194
9195 /*
9196 * Create and update the external subset.
9197 */
9198 ctxt->inSubset = 2;
9199 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9200 (!ctxt->disableSAX))
9201 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9202 ctxt->extSubSystem, ctxt->extSubURI);
9203 ctxt->inSubset = 0;
9204
9205
9206 ctxt->instate = XML_PARSER_PROLOG;
9207 xmlParseMisc(ctxt);
9208 }
9209
9210 /*
9211 * Time to start parsing the tree itself
9212 */
9213 GROW;
9214 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009215 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9216 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009217 } else {
9218 ctxt->instate = XML_PARSER_CONTENT;
9219 xmlParseElement(ctxt);
9220 ctxt->instate = XML_PARSER_EPILOG;
9221
9222
9223 /*
9224 * The Misc part at the end
9225 */
9226 xmlParseMisc(ctxt);
9227
Daniel Veillard561b7f82002-03-20 21:55:57 +00009228 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009230 }
9231 ctxt->instate = XML_PARSER_EOF;
9232 }
9233
9234 /*
9235 * SAX: end of the document processing.
9236 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009237 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009238 ctxt->sax->endDocument(ctxt->userData);
9239
Daniel Veillard5997aca2002-03-18 18:36:20 +00009240 /*
9241 * Remove locally kept entity definitions if the tree was not built
9242 */
9243 if ((ctxt->myDoc != NULL) &&
9244 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9245 xmlFreeDoc(ctxt->myDoc);
9246 ctxt->myDoc = NULL;
9247 }
9248
Daniel Veillardc7612992002-02-17 22:47:37 +00009249 if (! ctxt->wellFormed) {
9250 ctxt->valid = 0;
9251 return(-1);
9252 }
Owen Taylor3473f882001-02-23 17:55:21 +00009253 return(0);
9254}
9255
9256/**
9257 * xmlParseExtParsedEnt:
9258 * @ctxt: an XML parser context
9259 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009260 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009261 * An external general parsed entity is well-formed if it matches the
9262 * production labeled extParsedEnt.
9263 *
9264 * [78] extParsedEnt ::= TextDecl? content
9265 *
9266 * Returns 0, -1 in case of error. the parser context is augmented
9267 * as a result of the parsing.
9268 */
9269
9270int
9271xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9272 xmlChar start[4];
9273 xmlCharEncoding enc;
9274
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009275 if ((ctxt == NULL) || (ctxt->input == NULL))
9276 return(-1);
9277
Owen Taylor3473f882001-02-23 17:55:21 +00009278 xmlDefaultSAXHandlerInit();
9279
Daniel Veillard309f81d2003-09-23 09:02:53 +00009280 xmlDetectSAX2(ctxt);
9281
Owen Taylor3473f882001-02-23 17:55:21 +00009282 GROW;
9283
9284 /*
9285 * SAX: beginning of the document processing.
9286 */
9287 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9288 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9289
9290 /*
9291 * Get the 4 first bytes and decode the charset
9292 * if enc != XML_CHAR_ENCODING_NONE
9293 * plug some encoding conversion routines.
9294 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009295 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9296 start[0] = RAW;
9297 start[1] = NXT(1);
9298 start[2] = NXT(2);
9299 start[3] = NXT(3);
9300 enc = xmlDetectCharEncoding(start, 4);
9301 if (enc != XML_CHAR_ENCODING_NONE) {
9302 xmlSwitchEncoding(ctxt, enc);
9303 }
Owen Taylor3473f882001-02-23 17:55:21 +00009304 }
9305
9306
9307 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009308 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009309 }
9310
9311 /*
9312 * Check for the XMLDecl in the Prolog.
9313 */
9314 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009315 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009316
9317 /*
9318 * Note that we will switch encoding on the fly.
9319 */
9320 xmlParseXMLDecl(ctxt);
9321 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9322 /*
9323 * The XML REC instructs us to stop parsing right here
9324 */
9325 return(-1);
9326 }
9327 SKIP_BLANKS;
9328 } else {
9329 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9330 }
9331 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9332 ctxt->sax->startDocument(ctxt->userData);
9333
9334 /*
9335 * Doing validity checking on chunk doesn't make sense
9336 */
9337 ctxt->instate = XML_PARSER_CONTENT;
9338 ctxt->validate = 0;
9339 ctxt->loadsubset = 0;
9340 ctxt->depth = 0;
9341
9342 xmlParseContent(ctxt);
9343
9344 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009345 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009347 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009348 }
9349
9350 /*
9351 * SAX: end of the document processing.
9352 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009353 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009354 ctxt->sax->endDocument(ctxt->userData);
9355
9356 if (! ctxt->wellFormed) return(-1);
9357 return(0);
9358}
9359
Daniel Veillard73b013f2003-09-30 12:36:01 +00009360#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009361/************************************************************************
9362 * *
9363 * Progressive parsing interfaces *
9364 * *
9365 ************************************************************************/
9366
9367/**
9368 * xmlParseLookupSequence:
9369 * @ctxt: an XML parser context
9370 * @first: the first char to lookup
9371 * @next: the next char to lookup or zero
9372 * @third: the next char to lookup or zero
9373 *
9374 * Try to find if a sequence (first, next, third) or just (first next) or
9375 * (first) is available in the input stream.
9376 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9377 * to avoid rescanning sequences of bytes, it DOES change the state of the
9378 * parser, do not use liberally.
9379 *
9380 * Returns the index to the current parsing point if the full sequence
9381 * is available, -1 otherwise.
9382 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009383static int
Owen Taylor3473f882001-02-23 17:55:21 +00009384xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9385 xmlChar next, xmlChar third) {
9386 int base, len;
9387 xmlParserInputPtr in;
9388 const xmlChar *buf;
9389
9390 in = ctxt->input;
9391 if (in == NULL) return(-1);
9392 base = in->cur - in->base;
9393 if (base < 0) return(-1);
9394 if (ctxt->checkIndex > base)
9395 base = ctxt->checkIndex;
9396 if (in->buf == NULL) {
9397 buf = in->base;
9398 len = in->length;
9399 } else {
9400 buf = in->buf->buffer->content;
9401 len = in->buf->buffer->use;
9402 }
9403 /* take into account the sequence length */
9404 if (third) len -= 2;
9405 else if (next) len --;
9406 for (;base < len;base++) {
9407 if (buf[base] == first) {
9408 if (third != 0) {
9409 if ((buf[base + 1] != next) ||
9410 (buf[base + 2] != third)) continue;
9411 } else if (next != 0) {
9412 if (buf[base + 1] != next) continue;
9413 }
9414 ctxt->checkIndex = 0;
9415#ifdef DEBUG_PUSH
9416 if (next == 0)
9417 xmlGenericError(xmlGenericErrorContext,
9418 "PP: lookup '%c' found at %d\n",
9419 first, base);
9420 else if (third == 0)
9421 xmlGenericError(xmlGenericErrorContext,
9422 "PP: lookup '%c%c' found at %d\n",
9423 first, next, base);
9424 else
9425 xmlGenericError(xmlGenericErrorContext,
9426 "PP: lookup '%c%c%c' found at %d\n",
9427 first, next, third, base);
9428#endif
9429 return(base - (in->cur - in->base));
9430 }
9431 }
9432 ctxt->checkIndex = base;
9433#ifdef DEBUG_PUSH
9434 if (next == 0)
9435 xmlGenericError(xmlGenericErrorContext,
9436 "PP: lookup '%c' failed\n", first);
9437 else if (third == 0)
9438 xmlGenericError(xmlGenericErrorContext,
9439 "PP: lookup '%c%c' failed\n", first, next);
9440 else
9441 xmlGenericError(xmlGenericErrorContext,
9442 "PP: lookup '%c%c%c' failed\n", first, next, third);
9443#endif
9444 return(-1);
9445}
9446
9447/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 * xmlParseGetLasts:
9449 * @ctxt: an XML parser context
9450 * @lastlt: pointer to store the last '<' from the input
9451 * @lastgt: pointer to store the last '>' from the input
9452 *
9453 * Lookup the last < and > in the current chunk
9454 */
9455static void
9456xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9457 const xmlChar **lastgt) {
9458 const xmlChar *tmp;
9459
9460 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9461 xmlGenericError(xmlGenericErrorContext,
9462 "Internal error: xmlParseGetLasts\n");
9463 return;
9464 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009465 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 tmp = ctxt->input->end;
9467 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009468 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009469 if (tmp < ctxt->input->base) {
9470 *lastlt = NULL;
9471 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009473 *lastlt = tmp;
9474 tmp++;
9475 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9476 if (*tmp == '\'') {
9477 tmp++;
9478 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9479 if (tmp < ctxt->input->end) tmp++;
9480 } else if (*tmp == '"') {
9481 tmp++;
9482 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9483 if (tmp < ctxt->input->end) tmp++;
9484 } else
9485 tmp++;
9486 }
9487 if (tmp < ctxt->input->end)
9488 *lastgt = tmp;
9489 else {
9490 tmp = *lastlt;
9491 tmp--;
9492 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9493 if (tmp >= ctxt->input->base)
9494 *lastgt = tmp;
9495 else
9496 *lastgt = NULL;
9497 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009498 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 } else {
9500 *lastlt = NULL;
9501 *lastgt = NULL;
9502 }
9503}
9504/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009505 * xmlCheckCdataPush:
9506 * @cur: pointer to the bock of characters
9507 * @len: length of the block in bytes
9508 *
9509 * Check that the block of characters is okay as SCdata content [20]
9510 *
9511 * Returns the number of bytes to pass if okay, a negative index where an
9512 * UTF-8 error occured otherwise
9513 */
9514static int
9515xmlCheckCdataPush(const xmlChar *utf, int len) {
9516 int ix;
9517 unsigned char c;
9518 int codepoint;
9519
9520 if ((utf == NULL) || (len <= 0))
9521 return(0);
9522
9523 for (ix = 0; ix < len;) { /* string is 0-terminated */
9524 c = utf[ix];
9525 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9526 if (c >= 0x20)
9527 ix++;
9528 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9529 ix++;
9530 else
9531 return(-ix);
9532 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9533 if (ix + 2 > len) return(ix);
9534 if ((utf[ix+1] & 0xc0 ) != 0x80)
9535 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009536 codepoint = (utf[ix] & 0x1f) << 6;
9537 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009538 if (!xmlIsCharQ(codepoint))
9539 return(-ix);
9540 ix += 2;
9541 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9542 if (ix + 3 > len) return(ix);
9543 if (((utf[ix+1] & 0xc0) != 0x80) ||
9544 ((utf[ix+2] & 0xc0) != 0x80))
9545 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009546 codepoint = (utf[ix] & 0xf) << 12;
9547 codepoint |= (utf[ix+1] & 0x3f) << 6;
9548 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009549 if (!xmlIsCharQ(codepoint))
9550 return(-ix);
9551 ix += 3;
9552 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9553 if (ix + 4 > len) return(ix);
9554 if (((utf[ix+1] & 0xc0) != 0x80) ||
9555 ((utf[ix+2] & 0xc0) != 0x80) ||
9556 ((utf[ix+3] & 0xc0) != 0x80))
9557 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009558 codepoint = (utf[ix] & 0x7) << 18;
9559 codepoint |= (utf[ix+1] & 0x3f) << 12;
9560 codepoint |= (utf[ix+2] & 0x3f) << 6;
9561 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009562 if (!xmlIsCharQ(codepoint))
9563 return(-ix);
9564 ix += 4;
9565 } else /* unknown encoding */
9566 return(-ix);
9567 }
9568 return(ix);
9569}
9570
9571/**
Owen Taylor3473f882001-02-23 17:55:21 +00009572 * xmlParseTryOrFinish:
9573 * @ctxt: an XML parser context
9574 * @terminate: last chunk indicator
9575 *
9576 * Try to progress on parsing
9577 *
9578 * Returns zero if no parsing was possible
9579 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009580static int
Owen Taylor3473f882001-02-23 17:55:21 +00009581xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9582 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009583 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009584 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009585 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009586
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009587 if (ctxt->input == NULL)
9588 return(0);
9589
Owen Taylor3473f882001-02-23 17:55:21 +00009590#ifdef DEBUG_PUSH
9591 switch (ctxt->instate) {
9592 case XML_PARSER_EOF:
9593 xmlGenericError(xmlGenericErrorContext,
9594 "PP: try EOF\n"); break;
9595 case XML_PARSER_START:
9596 xmlGenericError(xmlGenericErrorContext,
9597 "PP: try START\n"); break;
9598 case XML_PARSER_MISC:
9599 xmlGenericError(xmlGenericErrorContext,
9600 "PP: try MISC\n");break;
9601 case XML_PARSER_COMMENT:
9602 xmlGenericError(xmlGenericErrorContext,
9603 "PP: try COMMENT\n");break;
9604 case XML_PARSER_PROLOG:
9605 xmlGenericError(xmlGenericErrorContext,
9606 "PP: try PROLOG\n");break;
9607 case XML_PARSER_START_TAG:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: try START_TAG\n");break;
9610 case XML_PARSER_CONTENT:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: try CONTENT\n");break;
9613 case XML_PARSER_CDATA_SECTION:
9614 xmlGenericError(xmlGenericErrorContext,
9615 "PP: try CDATA_SECTION\n");break;
9616 case XML_PARSER_END_TAG:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try END_TAG\n");break;
9619 case XML_PARSER_ENTITY_DECL:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try ENTITY_DECL\n");break;
9622 case XML_PARSER_ENTITY_VALUE:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try ENTITY_VALUE\n");break;
9625 case XML_PARSER_ATTRIBUTE_VALUE:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try ATTRIBUTE_VALUE\n");break;
9628 case XML_PARSER_DTD:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try DTD\n");break;
9631 case XML_PARSER_EPILOG:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try EPILOG\n");break;
9634 case XML_PARSER_PI:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try PI\n");break;
9637 case XML_PARSER_IGNORE:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try IGNORE\n");break;
9640 }
9641#endif
9642
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009643 if ((ctxt->input != NULL) &&
9644 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009645 xmlSHRINK(ctxt);
9646 ctxt->checkIndex = 0;
9647 }
9648 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009649
Daniel Veillarda880b122003-04-21 21:36:41 +00009650 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009651 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009652 return(0);
9653
9654
Owen Taylor3473f882001-02-23 17:55:21 +00009655 /*
9656 * Pop-up of finished entities.
9657 */
9658 while ((RAW == 0) && (ctxt->inputNr > 1))
9659 xmlPopInput(ctxt);
9660
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009661 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009662 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009663 avail = ctxt->input->length -
9664 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009665 else {
9666 /*
9667 * If we are operating on converted input, try to flush
9668 * remainng chars to avoid them stalling in the non-converted
9669 * buffer.
9670 */
9671 if ((ctxt->input->buf->raw != NULL) &&
9672 (ctxt->input->buf->raw->use > 0)) {
9673 int base = ctxt->input->base -
9674 ctxt->input->buf->buffer->content;
9675 int current = ctxt->input->cur - ctxt->input->base;
9676
9677 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9678 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9679 ctxt->input->cur = ctxt->input->base + current;
9680 ctxt->input->end =
9681 &ctxt->input->buf->buffer->content[
9682 ctxt->input->buf->buffer->use];
9683 }
9684 avail = ctxt->input->buf->buffer->use -
9685 (ctxt->input->cur - ctxt->input->base);
9686 }
Owen Taylor3473f882001-02-23 17:55:21 +00009687 if (avail < 1)
9688 goto done;
9689 switch (ctxt->instate) {
9690 case XML_PARSER_EOF:
9691 /*
9692 * Document parsing is done !
9693 */
9694 goto done;
9695 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009696 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9697 xmlChar start[4];
9698 xmlCharEncoding enc;
9699
9700 /*
9701 * Very first chars read from the document flow.
9702 */
9703 if (avail < 4)
9704 goto done;
9705
9706 /*
9707 * Get the 4 first bytes and decode the charset
9708 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009709 * plug some encoding conversion routines,
9710 * else xmlSwitchEncoding will set to (default)
9711 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009712 */
9713 start[0] = RAW;
9714 start[1] = NXT(1);
9715 start[2] = NXT(2);
9716 start[3] = NXT(3);
9717 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009718 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009719 break;
9720 }
Owen Taylor3473f882001-02-23 17:55:21 +00009721
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009722 if (avail < 2)
9723 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009724 cur = ctxt->input->cur[0];
9725 next = ctxt->input->cur[1];
9726 if (cur == 0) {
9727 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9728 ctxt->sax->setDocumentLocator(ctxt->userData,
9729 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 ctxt->instate = XML_PARSER_EOF;
9732#ifdef DEBUG_PUSH
9733 xmlGenericError(xmlGenericErrorContext,
9734 "PP: entering EOF\n");
9735#endif
9736 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9737 ctxt->sax->endDocument(ctxt->userData);
9738 goto done;
9739 }
9740 if ((cur == '<') && (next == '?')) {
9741 /* PI or XML decl */
9742 if (avail < 5) return(ret);
9743 if ((!terminate) &&
9744 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9745 return(ret);
9746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9747 ctxt->sax->setDocumentLocator(ctxt->userData,
9748 &xmlDefaultSAXLocator);
9749 if ((ctxt->input->cur[2] == 'x') &&
9750 (ctxt->input->cur[3] == 'm') &&
9751 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009752 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 ret += 5;
9754#ifdef DEBUG_PUSH
9755 xmlGenericError(xmlGenericErrorContext,
9756 "PP: Parsing XML Decl\n");
9757#endif
9758 xmlParseXMLDecl(ctxt);
9759 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9760 /*
9761 * The XML REC instructs us to stop parsing right
9762 * here
9763 */
9764 ctxt->instate = XML_PARSER_EOF;
9765 return(0);
9766 }
9767 ctxt->standalone = ctxt->input->standalone;
9768 if ((ctxt->encoding == NULL) &&
9769 (ctxt->input->encoding != NULL))
9770 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9771 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9772 (!ctxt->disableSAX))
9773 ctxt->sax->startDocument(ctxt->userData);
9774 ctxt->instate = XML_PARSER_MISC;
9775#ifdef DEBUG_PUSH
9776 xmlGenericError(xmlGenericErrorContext,
9777 "PP: entering MISC\n");
9778#endif
9779 } else {
9780 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9781 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9782 (!ctxt->disableSAX))
9783 ctxt->sax->startDocument(ctxt->userData);
9784 ctxt->instate = XML_PARSER_MISC;
9785#ifdef DEBUG_PUSH
9786 xmlGenericError(xmlGenericErrorContext,
9787 "PP: entering MISC\n");
9788#endif
9789 }
9790 } else {
9791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9792 ctxt->sax->setDocumentLocator(ctxt->userData,
9793 &xmlDefaultSAXLocator);
9794 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009795 if (ctxt->version == NULL) {
9796 xmlErrMemory(ctxt, NULL);
9797 break;
9798 }
Owen Taylor3473f882001-02-23 17:55:21 +00009799 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9800 (!ctxt->disableSAX))
9801 ctxt->sax->startDocument(ctxt->userData);
9802 ctxt->instate = XML_PARSER_MISC;
9803#ifdef DEBUG_PUSH
9804 xmlGenericError(xmlGenericErrorContext,
9805 "PP: entering MISC\n");
9806#endif
9807 }
9808 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009810 const xmlChar *name;
9811 const xmlChar *prefix;
9812 const xmlChar *URI;
9813 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009814
9815 if ((avail < 2) && (ctxt->inputNr == 1))
9816 goto done;
9817 cur = ctxt->input->cur[0];
9818 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009819 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009820 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9822 ctxt->sax->endDocument(ctxt->userData);
9823 goto done;
9824 }
9825 if (!terminate) {
9826 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009827 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009828 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009829 goto done;
9830 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9831 goto done;
9832 }
9833 }
9834 if (ctxt->spaceNr == 0)
9835 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009836 else if (*ctxt->space == -2)
9837 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 else
9839 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009841 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009842#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009843 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009844#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009845 else
9846 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009847#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009848 if (name == NULL) {
9849 spacePop(ctxt);
9850 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9852 ctxt->sax->endDocument(ctxt->userData);
9853 goto done;
9854 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009855#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009856 /*
9857 * [ VC: Root Element Type ]
9858 * The Name in the document type declaration must match
9859 * the element type of the root element.
9860 */
9861 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9862 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9863 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009864#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009865
9866 /*
9867 * Check for an Empty Element.
9868 */
9869 if ((RAW == '/') && (NXT(1) == '>')) {
9870 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009871
9872 if (ctxt->sax2) {
9873 if ((ctxt->sax != NULL) &&
9874 (ctxt->sax->endElementNs != NULL) &&
9875 (!ctxt->disableSAX))
9876 ctxt->sax->endElementNs(ctxt->userData, name,
9877 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009878 if (ctxt->nsNr - nsNr > 0)
9879 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009880#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009881 } else {
9882 if ((ctxt->sax != NULL) &&
9883 (ctxt->sax->endElement != NULL) &&
9884 (!ctxt->disableSAX))
9885 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009886#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009888 spacePop(ctxt);
9889 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009890 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009891 } else {
9892 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009893 }
9894 break;
9895 }
9896 if (RAW == '>') {
9897 NEXT;
9898 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009899 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009900 "Couldn't find end of Start Tag %s\n",
9901 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009903 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009905 if (ctxt->sax2)
9906 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009907#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009908 else
9909 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009910#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009911
Daniel Veillarda880b122003-04-21 21:36:41 +00009912 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009913 break;
9914 }
9915 case XML_PARSER_CONTENT: {
9916 const xmlChar *test;
9917 unsigned int cons;
9918 if ((avail < 2) && (ctxt->inputNr == 1))
9919 goto done;
9920 cur = ctxt->input->cur[0];
9921 next = ctxt->input->cur[1];
9922
9923 test = CUR_PTR;
9924 cons = ctxt->input->consumed;
9925 if ((cur == '<') && (next == '/')) {
9926 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009927 break;
9928 } else if ((cur == '<') && (next == '?')) {
9929 if ((!terminate) &&
9930 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9931 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009932 xmlParsePI(ctxt);
9933 } else if ((cur == '<') && (next != '!')) {
9934 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009935 break;
9936 } else if ((cur == '<') && (next == '!') &&
9937 (ctxt->input->cur[2] == '-') &&
9938 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009939 int term;
9940
9941 if (avail < 4)
9942 goto done;
9943 ctxt->input->cur += 4;
9944 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9945 ctxt->input->cur -= 4;
9946 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009947 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009948 xmlParseComment(ctxt);
9949 ctxt->instate = XML_PARSER_CONTENT;
9950 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9951 (ctxt->input->cur[2] == '[') &&
9952 (ctxt->input->cur[3] == 'C') &&
9953 (ctxt->input->cur[4] == 'D') &&
9954 (ctxt->input->cur[5] == 'A') &&
9955 (ctxt->input->cur[6] == 'T') &&
9956 (ctxt->input->cur[7] == 'A') &&
9957 (ctxt->input->cur[8] == '[')) {
9958 SKIP(9);
9959 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009960 break;
9961 } else if ((cur == '<') && (next == '!') &&
9962 (avail < 9)) {
9963 goto done;
9964 } else if (cur == '&') {
9965 if ((!terminate) &&
9966 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9967 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009968 xmlParseReference(ctxt);
9969 } else {
9970 /* TODO Avoid the extra copy, handle directly !!! */
9971 /*
9972 * Goal of the following test is:
9973 * - minimize calls to the SAX 'character' callback
9974 * when they are mergeable
9975 * - handle an problem for isBlank when we only parse
9976 * a sequence of blank chars and the next one is
9977 * not available to check against '<' presence.
9978 * - tries to homogenize the differences in SAX
9979 * callbacks between the push and pull versions
9980 * of the parser.
9981 */
9982 if ((ctxt->inputNr == 1) &&
9983 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9984 if (!terminate) {
9985 if (ctxt->progressive) {
9986 if ((lastlt == NULL) ||
9987 (ctxt->input->cur > lastlt))
9988 goto done;
9989 } else if (xmlParseLookupSequence(ctxt,
9990 '<', 0, 0) < 0) {
9991 goto done;
9992 }
9993 }
9994 }
9995 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009996 xmlParseCharData(ctxt, 0);
9997 }
9998 /*
9999 * Pop-up of finished entities.
10000 */
10001 while ((RAW == 0) && (ctxt->inputNr > 1))
10002 xmlPopInput(ctxt);
10003 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010004 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10005 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010006 ctxt->instate = XML_PARSER_EOF;
10007 break;
10008 }
10009 break;
10010 }
10011 case XML_PARSER_END_TAG:
10012 if (avail < 2)
10013 goto done;
10014 if (!terminate) {
10015 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010016 /* > can be found unescaped in attribute values */
10017 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 goto done;
10019 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10020 goto done;
10021 }
10022 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010023 if (ctxt->sax2) {
10024 xmlParseEndTag2(ctxt,
10025 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10026 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010027 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010028 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010029 }
10030#ifdef LIBXML_SAX1_ENABLED
10031 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010032 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010033#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010034 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 } else {
10037 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010038 }
10039 break;
10040 case XML_PARSER_CDATA_SECTION: {
10041 /*
10042 * The Push mode need to have the SAX callback for
10043 * cdataBlock merge back contiguous callbacks.
10044 */
10045 int base;
10046
10047 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10048 if (base < 0) {
10049 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010050 int tmp;
10051
10052 tmp = xmlCheckCdataPush(ctxt->input->cur,
10053 XML_PARSER_BIG_BUFFER_SIZE);
10054 if (tmp < 0) {
10055 tmp = -tmp;
10056 ctxt->input->cur += tmp;
10057 goto encoding_error;
10058 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010059 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10060 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010061 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010062 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010063 else if (ctxt->sax->characters != NULL)
10064 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010065 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010066 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010067 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010068 ctxt->checkIndex = 0;
10069 }
10070 goto done;
10071 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010072 int tmp;
10073
10074 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10075 if ((tmp < 0) || (tmp != base)) {
10076 tmp = -tmp;
10077 ctxt->input->cur += tmp;
10078 goto encoding_error;
10079 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010080 if ((ctxt->sax != NULL) && (base > 0) &&
10081 (!ctxt->disableSAX)) {
10082 if (ctxt->sax->cdataBlock != NULL)
10083 ctxt->sax->cdataBlock(ctxt->userData,
10084 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010085 else if (ctxt->sax->characters != NULL)
10086 ctxt->sax->characters(ctxt->userData,
10087 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010088 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010089 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010090 ctxt->checkIndex = 0;
10091 ctxt->instate = XML_PARSER_CONTENT;
10092#ifdef DEBUG_PUSH
10093 xmlGenericError(xmlGenericErrorContext,
10094 "PP: entering CONTENT\n");
10095#endif
10096 }
10097 break;
10098 }
Owen Taylor3473f882001-02-23 17:55:21 +000010099 case XML_PARSER_MISC:
10100 SKIP_BLANKS;
10101 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010102 avail = ctxt->input->length -
10103 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010104 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010105 avail = ctxt->input->buf->buffer->use -
10106 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010107 if (avail < 2)
10108 goto done;
10109 cur = ctxt->input->cur[0];
10110 next = ctxt->input->cur[1];
10111 if ((cur == '<') && (next == '?')) {
10112 if ((!terminate) &&
10113 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10114 goto done;
10115#ifdef DEBUG_PUSH
10116 xmlGenericError(xmlGenericErrorContext,
10117 "PP: Parsing PI\n");
10118#endif
10119 xmlParsePI(ctxt);
10120 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010121 (ctxt->input->cur[2] == '-') &&
10122 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010123 if ((!terminate) &&
10124 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10125 goto done;
10126#ifdef DEBUG_PUSH
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: Parsing Comment\n");
10129#endif
10130 xmlParseComment(ctxt);
10131 ctxt->instate = XML_PARSER_MISC;
10132 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010133 (ctxt->input->cur[2] == 'D') &&
10134 (ctxt->input->cur[3] == 'O') &&
10135 (ctxt->input->cur[4] == 'C') &&
10136 (ctxt->input->cur[5] == 'T') &&
10137 (ctxt->input->cur[6] == 'Y') &&
10138 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010139 (ctxt->input->cur[8] == 'E')) {
10140 if ((!terminate) &&
10141 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10142 goto done;
10143#ifdef DEBUG_PUSH
10144 xmlGenericError(xmlGenericErrorContext,
10145 "PP: Parsing internal subset\n");
10146#endif
10147 ctxt->inSubset = 1;
10148 xmlParseDocTypeDecl(ctxt);
10149 if (RAW == '[') {
10150 ctxt->instate = XML_PARSER_DTD;
10151#ifdef DEBUG_PUSH
10152 xmlGenericError(xmlGenericErrorContext,
10153 "PP: entering DTD\n");
10154#endif
10155 } else {
10156 /*
10157 * Create and update the external subset.
10158 */
10159 ctxt->inSubset = 2;
10160 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10161 (ctxt->sax->externalSubset != NULL))
10162 ctxt->sax->externalSubset(ctxt->userData,
10163 ctxt->intSubName, ctxt->extSubSystem,
10164 ctxt->extSubURI);
10165 ctxt->inSubset = 0;
10166 ctxt->instate = XML_PARSER_PROLOG;
10167#ifdef DEBUG_PUSH
10168 xmlGenericError(xmlGenericErrorContext,
10169 "PP: entering PROLOG\n");
10170#endif
10171 }
10172 } else if ((cur == '<') && (next == '!') &&
10173 (avail < 9)) {
10174 goto done;
10175 } else {
10176 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010177 ctxt->progressive = 1;
10178 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010179#ifdef DEBUG_PUSH
10180 xmlGenericError(xmlGenericErrorContext,
10181 "PP: entering START_TAG\n");
10182#endif
10183 }
10184 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010185 case XML_PARSER_PROLOG:
10186 SKIP_BLANKS;
10187 if (ctxt->input->buf == NULL)
10188 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10189 else
10190 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10191 if (avail < 2)
10192 goto done;
10193 cur = ctxt->input->cur[0];
10194 next = ctxt->input->cur[1];
10195 if ((cur == '<') && (next == '?')) {
10196 if ((!terminate) &&
10197 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10198 goto done;
10199#ifdef DEBUG_PUSH
10200 xmlGenericError(xmlGenericErrorContext,
10201 "PP: Parsing PI\n");
10202#endif
10203 xmlParsePI(ctxt);
10204 } else if ((cur == '<') && (next == '!') &&
10205 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10206 if ((!terminate) &&
10207 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10208 goto done;
10209#ifdef DEBUG_PUSH
10210 xmlGenericError(xmlGenericErrorContext,
10211 "PP: Parsing Comment\n");
10212#endif
10213 xmlParseComment(ctxt);
10214 ctxt->instate = XML_PARSER_PROLOG;
10215 } else if ((cur == '<') && (next == '!') &&
10216 (avail < 4)) {
10217 goto done;
10218 } else {
10219 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010220 if (ctxt->progressive == 0)
10221 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010222 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010223#ifdef DEBUG_PUSH
10224 xmlGenericError(xmlGenericErrorContext,
10225 "PP: entering START_TAG\n");
10226#endif
10227 }
10228 break;
10229 case XML_PARSER_EPILOG:
10230 SKIP_BLANKS;
10231 if (ctxt->input->buf == NULL)
10232 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10233 else
10234 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10235 if (avail < 2)
10236 goto done;
10237 cur = ctxt->input->cur[0];
10238 next = ctxt->input->cur[1];
10239 if ((cur == '<') && (next == '?')) {
10240 if ((!terminate) &&
10241 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10242 goto done;
10243#ifdef DEBUG_PUSH
10244 xmlGenericError(xmlGenericErrorContext,
10245 "PP: Parsing PI\n");
10246#endif
10247 xmlParsePI(ctxt);
10248 ctxt->instate = XML_PARSER_EPILOG;
10249 } else if ((cur == '<') && (next == '!') &&
10250 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10251 if ((!terminate) &&
10252 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10253 goto done;
10254#ifdef DEBUG_PUSH
10255 xmlGenericError(xmlGenericErrorContext,
10256 "PP: Parsing Comment\n");
10257#endif
10258 xmlParseComment(ctxt);
10259 ctxt->instate = XML_PARSER_EPILOG;
10260 } else if ((cur == '<') && (next == '!') &&
10261 (avail < 4)) {
10262 goto done;
10263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010264 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010265 ctxt->instate = XML_PARSER_EOF;
10266#ifdef DEBUG_PUSH
10267 xmlGenericError(xmlGenericErrorContext,
10268 "PP: entering EOF\n");
10269#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010270 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010271 ctxt->sax->endDocument(ctxt->userData);
10272 goto done;
10273 }
10274 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010275 case XML_PARSER_DTD: {
10276 /*
10277 * Sorry but progressive parsing of the internal subset
10278 * is not expected to be supported. We first check that
10279 * the full content of the internal subset is available and
10280 * the parsing is launched only at that point.
10281 * Internal subset ends up with "']' S? '>'" in an unescaped
10282 * section and not in a ']]>' sequence which are conditional
10283 * sections (whoever argued to keep that crap in XML deserve
10284 * a place in hell !).
10285 */
10286 int base, i;
10287 xmlChar *buf;
10288 xmlChar quote = 0;
10289
10290 base = ctxt->input->cur - ctxt->input->base;
10291 if (base < 0) return(0);
10292 if (ctxt->checkIndex > base)
10293 base = ctxt->checkIndex;
10294 buf = ctxt->input->buf->buffer->content;
10295 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10296 base++) {
10297 if (quote != 0) {
10298 if (buf[base] == quote)
10299 quote = 0;
10300 continue;
10301 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010302 if ((quote == 0) && (buf[base] == '<')) {
10303 int found = 0;
10304 /* special handling of comments */
10305 if (((unsigned int) base + 4 <
10306 ctxt->input->buf->buffer->use) &&
10307 (buf[base + 1] == '!') &&
10308 (buf[base + 2] == '-') &&
10309 (buf[base + 3] == '-')) {
10310 for (;(unsigned int) base + 3 <
10311 ctxt->input->buf->buffer->use; base++) {
10312 if ((buf[base] == '-') &&
10313 (buf[base + 1] == '-') &&
10314 (buf[base + 2] == '>')) {
10315 found = 1;
10316 base += 2;
10317 break;
10318 }
10319 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010320 if (!found) {
10321#if 0
10322 fprintf(stderr, "unfinished comment\n");
10323#endif
10324 break; /* for */
10325 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010326 continue;
10327 }
10328 }
Owen Taylor3473f882001-02-23 17:55:21 +000010329 if (buf[base] == '"') {
10330 quote = '"';
10331 continue;
10332 }
10333 if (buf[base] == '\'') {
10334 quote = '\'';
10335 continue;
10336 }
10337 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010338#if 0
10339 fprintf(stderr, "%c%c%c%c: ", buf[base],
10340 buf[base + 1], buf[base + 2], buf[base + 3]);
10341#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010342 if ((unsigned int) base +1 >=
10343 ctxt->input->buf->buffer->use)
10344 break;
10345 if (buf[base + 1] == ']') {
10346 /* conditional crap, skip both ']' ! */
10347 base++;
10348 continue;
10349 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010350 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010351 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10352 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010353 if (buf[base + i] == '>') {
10354#if 0
10355 fprintf(stderr, "found\n");
10356#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010357 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010358 }
10359 if (!IS_BLANK_CH(buf[base + i])) {
10360#if 0
10361 fprintf(stderr, "not found\n");
10362#endif
10363 goto not_end_of_int_subset;
10364 }
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010366#if 0
10367 fprintf(stderr, "end of stream\n");
10368#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010369 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010370
Owen Taylor3473f882001-02-23 17:55:21 +000010371 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010372not_end_of_int_subset:
10373 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
10375 /*
10376 * We didn't found the end of the Internal subset
10377 */
Owen Taylor3473f882001-02-23 17:55:21 +000010378#ifdef DEBUG_PUSH
10379 if (next == 0)
10380 xmlGenericError(xmlGenericErrorContext,
10381 "PP: lookup of int subset end filed\n");
10382#endif
10383 goto done;
10384
10385found_end_int_subset:
10386 xmlParseInternalSubset(ctxt);
10387 ctxt->inSubset = 2;
10388 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10389 (ctxt->sax->externalSubset != NULL))
10390 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10391 ctxt->extSubSystem, ctxt->extSubURI);
10392 ctxt->inSubset = 0;
10393 ctxt->instate = XML_PARSER_PROLOG;
10394 ctxt->checkIndex = 0;
10395#ifdef DEBUG_PUSH
10396 xmlGenericError(xmlGenericErrorContext,
10397 "PP: entering PROLOG\n");
10398#endif
10399 break;
10400 }
10401 case XML_PARSER_COMMENT:
10402 xmlGenericError(xmlGenericErrorContext,
10403 "PP: internal error, state == COMMENT\n");
10404 ctxt->instate = XML_PARSER_CONTENT;
10405#ifdef DEBUG_PUSH
10406 xmlGenericError(xmlGenericErrorContext,
10407 "PP: entering CONTENT\n");
10408#endif
10409 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010410 case XML_PARSER_IGNORE:
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: internal error, state == IGNORE");
10413 ctxt->instate = XML_PARSER_DTD;
10414#ifdef DEBUG_PUSH
10415 xmlGenericError(xmlGenericErrorContext,
10416 "PP: entering DTD\n");
10417#endif
10418 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010419 case XML_PARSER_PI:
10420 xmlGenericError(xmlGenericErrorContext,
10421 "PP: internal error, state == PI\n");
10422 ctxt->instate = XML_PARSER_CONTENT;
10423#ifdef DEBUG_PUSH
10424 xmlGenericError(xmlGenericErrorContext,
10425 "PP: entering CONTENT\n");
10426#endif
10427 break;
10428 case XML_PARSER_ENTITY_DECL:
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: internal error, state == ENTITY_DECL\n");
10431 ctxt->instate = XML_PARSER_DTD;
10432#ifdef DEBUG_PUSH
10433 xmlGenericError(xmlGenericErrorContext,
10434 "PP: entering DTD\n");
10435#endif
10436 break;
10437 case XML_PARSER_ENTITY_VALUE:
10438 xmlGenericError(xmlGenericErrorContext,
10439 "PP: internal error, state == ENTITY_VALUE\n");
10440 ctxt->instate = XML_PARSER_CONTENT;
10441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext,
10443 "PP: entering DTD\n");
10444#endif
10445 break;
10446 case XML_PARSER_ATTRIBUTE_VALUE:
10447 xmlGenericError(xmlGenericErrorContext,
10448 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10449 ctxt->instate = XML_PARSER_START_TAG;
10450#ifdef DEBUG_PUSH
10451 xmlGenericError(xmlGenericErrorContext,
10452 "PP: entering START_TAG\n");
10453#endif
10454 break;
10455 case XML_PARSER_SYSTEM_LITERAL:
10456 xmlGenericError(xmlGenericErrorContext,
10457 "PP: internal error, state == SYSTEM_LITERAL\n");
10458 ctxt->instate = XML_PARSER_START_TAG;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: entering START_TAG\n");
10462#endif
10463 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010464 case XML_PARSER_PUBLIC_LITERAL:
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: internal error, state == PUBLIC_LITERAL\n");
10467 ctxt->instate = XML_PARSER_START_TAG;
10468#ifdef DEBUG_PUSH
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: entering START_TAG\n");
10471#endif
10472 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010473 }
10474 }
10475done:
10476#ifdef DEBUG_PUSH
10477 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10478#endif
10479 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010480encoding_error:
10481 {
10482 char buffer[150];
10483
10484 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10485 ctxt->input->cur[0], ctxt->input->cur[1],
10486 ctxt->input->cur[2], ctxt->input->cur[3]);
10487 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10488 "Input is not proper UTF-8, indicate encoding !\n%s",
10489 BAD_CAST buffer, NULL);
10490 }
10491 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010492}
10493
10494/**
Owen Taylor3473f882001-02-23 17:55:21 +000010495 * xmlParseChunk:
10496 * @ctxt: an XML parser context
10497 * @chunk: an char array
10498 * @size: the size in byte of the chunk
10499 * @terminate: last chunk indicator
10500 *
10501 * Parse a Chunk of memory
10502 *
10503 * Returns zero if no error, the xmlParserErrors otherwise.
10504 */
10505int
10506xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10507 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010508 int end_in_lf = 0;
10509
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010510 if (ctxt == NULL)
10511 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010512 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010513 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010514 if (ctxt->instate == XML_PARSER_START)
10515 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010516 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10517 (chunk[size - 1] == '\r')) {
10518 end_in_lf = 1;
10519 size--;
10520 }
Owen Taylor3473f882001-02-23 17:55:21 +000010521 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10522 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10523 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10524 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010525 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010526
William M. Bracka3215c72004-07-31 16:24:01 +000010527 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10528 if (res < 0) {
10529 ctxt->errNo = XML_PARSER_EOF;
10530 ctxt->disableSAX = 1;
10531 return (XML_PARSER_EOF);
10532 }
Owen Taylor3473f882001-02-23 17:55:21 +000010533 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10534 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010535 ctxt->input->end =
10536 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010537#ifdef DEBUG_PUSH
10538 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10539#endif
10540
Owen Taylor3473f882001-02-23 17:55:21 +000010541 } else if (ctxt->instate != XML_PARSER_EOF) {
10542 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10543 xmlParserInputBufferPtr in = ctxt->input->buf;
10544 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10545 (in->raw != NULL)) {
10546 int nbchars;
10547
10548 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10549 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010550 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010551 xmlGenericError(xmlGenericErrorContext,
10552 "xmlParseChunk: encoder error\n");
10553 return(XML_ERR_INVALID_ENCODING);
10554 }
10555 }
10556 }
10557 }
10558 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010559 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10560 (ctxt->input->buf != NULL)) {
10561 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10562 }
Daniel Veillard14412512005-01-21 23:53:26 +000010563 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010564 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010565 if (terminate) {
10566 /*
10567 * Check for termination
10568 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010569 int avail = 0;
10570
10571 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010572 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010573 avail = ctxt->input->length -
10574 (ctxt->input->cur - ctxt->input->base);
10575 else
10576 avail = ctxt->input->buf->buffer->use -
10577 (ctxt->input->cur - ctxt->input->base);
10578 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010579
Owen Taylor3473f882001-02-23 17:55:21 +000010580 if ((ctxt->instate != XML_PARSER_EOF) &&
10581 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010583 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010584 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010585 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010586 }
Owen Taylor3473f882001-02-23 17:55:21 +000010587 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010588 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010589 ctxt->sax->endDocument(ctxt->userData);
10590 }
10591 ctxt->instate = XML_PARSER_EOF;
10592 }
10593 return((xmlParserErrors) ctxt->errNo);
10594}
10595
10596/************************************************************************
10597 * *
10598 * I/O front end functions to the parser *
10599 * *
10600 ************************************************************************/
10601
10602/**
Owen Taylor3473f882001-02-23 17:55:21 +000010603 * xmlCreatePushParserCtxt:
10604 * @sax: a SAX handler
10605 * @user_data: The user data returned on SAX callbacks
10606 * @chunk: a pointer to an array of chars
10607 * @size: number of chars in the array
10608 * @filename: an optional file name or URI
10609 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010610 * Create a parser context for using the XML parser in push mode.
10611 * If @buffer and @size are non-NULL, the data is used to detect
10612 * the encoding. The remaining characters will be parsed so they
10613 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010614 * To allow content encoding detection, @size should be >= 4
10615 * The value of @filename is used for fetching external entities
10616 * and error/warning reports.
10617 *
10618 * Returns the new parser context or NULL
10619 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010620
Owen Taylor3473f882001-02-23 17:55:21 +000010621xmlParserCtxtPtr
10622xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10623 const char *chunk, int size, const char *filename) {
10624 xmlParserCtxtPtr ctxt;
10625 xmlParserInputPtr inputStream;
10626 xmlParserInputBufferPtr buf;
10627 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10628
10629 /*
10630 * plug some encoding conversion routines
10631 */
10632 if ((chunk != NULL) && (size >= 4))
10633 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10634
10635 buf = xmlAllocParserInputBuffer(enc);
10636 if (buf == NULL) return(NULL);
10637
10638 ctxt = xmlNewParserCtxt();
10639 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010640 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010641 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010642 return(NULL);
10643 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010644 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010645 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10646 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010647 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010648 xmlFreeParserInputBuffer(buf);
10649 xmlFreeParserCtxt(ctxt);
10650 return(NULL);
10651 }
Owen Taylor3473f882001-02-23 17:55:21 +000010652 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010653#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010654 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010655#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010656 xmlFree(ctxt->sax);
10657 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10658 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010659 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010660 xmlFreeParserInputBuffer(buf);
10661 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010662 return(NULL);
10663 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010664 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10665 if (sax->initialized == XML_SAX2_MAGIC)
10666 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10667 else
10668 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010669 if (user_data != NULL)
10670 ctxt->userData = user_data;
10671 }
10672 if (filename == NULL) {
10673 ctxt->directory = NULL;
10674 } else {
10675 ctxt->directory = xmlParserGetDirectory(filename);
10676 }
10677
10678 inputStream = xmlNewInputStream(ctxt);
10679 if (inputStream == NULL) {
10680 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010681 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010682 return(NULL);
10683 }
10684
10685 if (filename == NULL)
10686 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010687 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010688 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010689 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010690 if (inputStream->filename == NULL) {
10691 xmlFreeParserCtxt(ctxt);
10692 xmlFreeParserInputBuffer(buf);
10693 return(NULL);
10694 }
10695 }
Owen Taylor3473f882001-02-23 17:55:21 +000010696 inputStream->buf = buf;
10697 inputStream->base = inputStream->buf->buffer->content;
10698 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010699 inputStream->end =
10700 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010701
10702 inputPush(ctxt, inputStream);
10703
William M. Brack3a1cd212005-02-11 14:35:54 +000010704 /*
10705 * If the caller didn't provide an initial 'chunk' for determining
10706 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10707 * that it can be automatically determined later
10708 */
10709 if ((size == 0) || (chunk == NULL)) {
10710 ctxt->charset = XML_CHAR_ENCODING_NONE;
10711 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010712 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10713 int cur = ctxt->input->cur - ctxt->input->base;
10714
Owen Taylor3473f882001-02-23 17:55:21 +000010715 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010716
10717 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10718 ctxt->input->cur = ctxt->input->base + cur;
10719 ctxt->input->end =
10720 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010721#ifdef DEBUG_PUSH
10722 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10723#endif
10724 }
10725
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010726 if (enc != XML_CHAR_ENCODING_NONE) {
10727 xmlSwitchEncoding(ctxt, enc);
10728 }
10729
Owen Taylor3473f882001-02-23 17:55:21 +000010730 return(ctxt);
10731}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010732#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010733
10734/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010735 * xmlStopParser:
10736 * @ctxt: an XML parser context
10737 *
10738 * Blocks further parser processing
10739 */
10740void
10741xmlStopParser(xmlParserCtxtPtr ctxt) {
10742 if (ctxt == NULL)
10743 return;
10744 ctxt->instate = XML_PARSER_EOF;
10745 ctxt->disableSAX = 1;
10746 if (ctxt->input != NULL) {
10747 ctxt->input->cur = BAD_CAST"";
10748 ctxt->input->base = ctxt->input->cur;
10749 }
10750}
10751
10752/**
Owen Taylor3473f882001-02-23 17:55:21 +000010753 * xmlCreateIOParserCtxt:
10754 * @sax: a SAX handler
10755 * @user_data: The user data returned on SAX callbacks
10756 * @ioread: an I/O read function
10757 * @ioclose: an I/O close function
10758 * @ioctx: an I/O handler
10759 * @enc: the charset encoding if known
10760 *
10761 * Create a parser context for using the XML parser with an existing
10762 * I/O stream
10763 *
10764 * Returns the new parser context or NULL
10765 */
10766xmlParserCtxtPtr
10767xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10768 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10769 void *ioctx, xmlCharEncoding enc) {
10770 xmlParserCtxtPtr ctxt;
10771 xmlParserInputPtr inputStream;
10772 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010773
10774 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010775
10776 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10777 if (buf == NULL) return(NULL);
10778
10779 ctxt = xmlNewParserCtxt();
10780 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010781 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010782 return(NULL);
10783 }
10784 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010785#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010786 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010787#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010788 xmlFree(ctxt->sax);
10789 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10790 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010791 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010792 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010793 return(NULL);
10794 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010795 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10796 if (sax->initialized == XML_SAX2_MAGIC)
10797 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10798 else
10799 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010800 if (user_data != NULL)
10801 ctxt->userData = user_data;
10802 }
10803
10804 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10805 if (inputStream == NULL) {
10806 xmlFreeParserCtxt(ctxt);
10807 return(NULL);
10808 }
10809 inputPush(ctxt, inputStream);
10810
10811 return(ctxt);
10812}
10813
Daniel Veillard4432df22003-09-28 18:58:27 +000010814#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010815/************************************************************************
10816 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010817 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010818 * *
10819 ************************************************************************/
10820
10821/**
10822 * xmlIOParseDTD:
10823 * @sax: the SAX handler block or NULL
10824 * @input: an Input Buffer
10825 * @enc: the charset encoding if known
10826 *
10827 * Load and parse a DTD
10828 *
10829 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010830 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010831 */
10832
10833xmlDtdPtr
10834xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10835 xmlCharEncoding enc) {
10836 xmlDtdPtr ret = NULL;
10837 xmlParserCtxtPtr ctxt;
10838 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010839 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010840
10841 if (input == NULL)
10842 return(NULL);
10843
10844 ctxt = xmlNewParserCtxt();
10845 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010846 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010847 return(NULL);
10848 }
10849
10850 /*
10851 * Set-up the SAX context
10852 */
10853 if (sax != NULL) {
10854 if (ctxt->sax != NULL)
10855 xmlFree(ctxt->sax);
10856 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010857 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010858 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010859 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010860
10861 /*
10862 * generate a parser input from the I/O handler
10863 */
10864
Daniel Veillard43caefb2003-12-07 19:32:22 +000010865 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010866 if (pinput == NULL) {
10867 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010868 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010869 xmlFreeParserCtxt(ctxt);
10870 return(NULL);
10871 }
10872
10873 /*
10874 * plug some encoding conversion routines here.
10875 */
10876 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010877 if (enc != XML_CHAR_ENCODING_NONE) {
10878 xmlSwitchEncoding(ctxt, enc);
10879 }
Owen Taylor3473f882001-02-23 17:55:21 +000010880
10881 pinput->filename = NULL;
10882 pinput->line = 1;
10883 pinput->col = 1;
10884 pinput->base = ctxt->input->cur;
10885 pinput->cur = ctxt->input->cur;
10886 pinput->free = NULL;
10887
10888 /*
10889 * let's parse that entity knowing it's an external subset.
10890 */
10891 ctxt->inSubset = 2;
10892 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10893 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10894 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010895
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010896 if ((enc == XML_CHAR_ENCODING_NONE) &&
10897 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010898 /*
10899 * Get the 4 first bytes and decode the charset
10900 * if enc != XML_CHAR_ENCODING_NONE
10901 * plug some encoding conversion routines.
10902 */
10903 start[0] = RAW;
10904 start[1] = NXT(1);
10905 start[2] = NXT(2);
10906 start[3] = NXT(3);
10907 enc = xmlDetectCharEncoding(start, 4);
10908 if (enc != XML_CHAR_ENCODING_NONE) {
10909 xmlSwitchEncoding(ctxt, enc);
10910 }
10911 }
10912
Owen Taylor3473f882001-02-23 17:55:21 +000010913 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10914
10915 if (ctxt->myDoc != NULL) {
10916 if (ctxt->wellFormed) {
10917 ret = ctxt->myDoc->extSubset;
10918 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010919 if (ret != NULL) {
10920 xmlNodePtr tmp;
10921
10922 ret->doc = NULL;
10923 tmp = ret->children;
10924 while (tmp != NULL) {
10925 tmp->doc = NULL;
10926 tmp = tmp->next;
10927 }
10928 }
Owen Taylor3473f882001-02-23 17:55:21 +000010929 } else {
10930 ret = NULL;
10931 }
10932 xmlFreeDoc(ctxt->myDoc);
10933 ctxt->myDoc = NULL;
10934 }
10935 if (sax != NULL) ctxt->sax = NULL;
10936 xmlFreeParserCtxt(ctxt);
10937
10938 return(ret);
10939}
10940
10941/**
10942 * xmlSAXParseDTD:
10943 * @sax: the SAX handler block
10944 * @ExternalID: a NAME* containing the External ID of the DTD
10945 * @SystemID: a NAME* containing the URL to the DTD
10946 *
10947 * Load and parse an external subset.
10948 *
10949 * Returns the resulting xmlDtdPtr or NULL in case of error.
10950 */
10951
10952xmlDtdPtr
10953xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10954 const xmlChar *SystemID) {
10955 xmlDtdPtr ret = NULL;
10956 xmlParserCtxtPtr ctxt;
10957 xmlParserInputPtr input = NULL;
10958 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010959 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010960
10961 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10962
10963 ctxt = xmlNewParserCtxt();
10964 if (ctxt == NULL) {
10965 return(NULL);
10966 }
10967
10968 /*
10969 * Set-up the SAX context
10970 */
10971 if (sax != NULL) {
10972 if (ctxt->sax != NULL)
10973 xmlFree(ctxt->sax);
10974 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010975 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010976 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010977
10978 /*
10979 * Canonicalise the system ID
10980 */
10981 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010982 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010983 xmlFreeParserCtxt(ctxt);
10984 return(NULL);
10985 }
Owen Taylor3473f882001-02-23 17:55:21 +000010986
10987 /*
10988 * Ask the Entity resolver to load the damn thing
10989 */
10990
10991 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000010992 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
10993 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010994 if (input == NULL) {
10995 if (sax != NULL) ctxt->sax = NULL;
10996 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010997 if (systemIdCanonic != NULL)
10998 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010999 return(NULL);
11000 }
11001
11002 /*
11003 * plug some encoding conversion routines here.
11004 */
11005 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011006 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11008 xmlSwitchEncoding(ctxt, enc);
11009 }
Owen Taylor3473f882001-02-23 17:55:21 +000011010
11011 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011012 input->filename = (char *) systemIdCanonic;
11013 else
11014 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011015 input->line = 1;
11016 input->col = 1;
11017 input->base = ctxt->input->cur;
11018 input->cur = ctxt->input->cur;
11019 input->free = NULL;
11020
11021 /*
11022 * let's parse that entity knowing it's an external subset.
11023 */
11024 ctxt->inSubset = 2;
11025 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11026 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11027 ExternalID, SystemID);
11028 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11029
11030 if (ctxt->myDoc != NULL) {
11031 if (ctxt->wellFormed) {
11032 ret = ctxt->myDoc->extSubset;
11033 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011034 if (ret != NULL) {
11035 xmlNodePtr tmp;
11036
11037 ret->doc = NULL;
11038 tmp = ret->children;
11039 while (tmp != NULL) {
11040 tmp->doc = NULL;
11041 tmp = tmp->next;
11042 }
11043 }
Owen Taylor3473f882001-02-23 17:55:21 +000011044 } else {
11045 ret = NULL;
11046 }
11047 xmlFreeDoc(ctxt->myDoc);
11048 ctxt->myDoc = NULL;
11049 }
11050 if (sax != NULL) ctxt->sax = NULL;
11051 xmlFreeParserCtxt(ctxt);
11052
11053 return(ret);
11054}
11055
Daniel Veillard4432df22003-09-28 18:58:27 +000011056
Owen Taylor3473f882001-02-23 17:55:21 +000011057/**
11058 * xmlParseDTD:
11059 * @ExternalID: a NAME* containing the External ID of the DTD
11060 * @SystemID: a NAME* containing the URL to the DTD
11061 *
11062 * Load and parse an external subset.
11063 *
11064 * Returns the resulting xmlDtdPtr or NULL in case of error.
11065 */
11066
11067xmlDtdPtr
11068xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11069 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11070}
Daniel Veillard4432df22003-09-28 18:58:27 +000011071#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011072
11073/************************************************************************
11074 * *
11075 * Front ends when parsing an Entity *
11076 * *
11077 ************************************************************************/
11078
11079/**
Owen Taylor3473f882001-02-23 17:55:21 +000011080 * xmlParseCtxtExternalEntity:
11081 * @ctx: the existing parsing context
11082 * @URL: the URL for the entity to load
11083 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011084 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011085 *
11086 * Parse an external general entity within an existing parsing context
11087 * An external general parsed entity is well-formed if it matches the
11088 * production labeled extParsedEnt.
11089 *
11090 * [78] extParsedEnt ::= TextDecl? content
11091 *
11092 * Returns 0 if the entity is well formed, -1 in case of args problem and
11093 * the parser error code otherwise
11094 */
11095
11096int
11097xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011098 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011099 xmlParserCtxtPtr ctxt;
11100 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011101 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011102 xmlSAXHandlerPtr oldsax = NULL;
11103 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011104 xmlChar start[4];
11105 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011106 xmlParserInputPtr inputStream;
11107 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011108
Daniel Veillardce682bc2004-11-05 17:22:25 +000011109 if (ctx == NULL) return(-1);
11110
Owen Taylor3473f882001-02-23 17:55:21 +000011111 if (ctx->depth > 40) {
11112 return(XML_ERR_ENTITY_LOOP);
11113 }
11114
Daniel Veillardcda96922001-08-21 10:56:31 +000011115 if (lst != NULL)
11116 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011117 if ((URL == NULL) && (ID == NULL))
11118 return(-1);
11119 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11120 return(-1);
11121
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011122 ctxt = xmlNewParserCtxt();
11123 if (ctxt == NULL) {
11124 return(-1);
11125 }
11126
Owen Taylor3473f882001-02-23 17:55:21 +000011127 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011128 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011129
11130 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11131 if (inputStream == NULL) {
11132 xmlFreeParserCtxt(ctxt);
11133 return(-1);
11134 }
11135
11136 inputPush(ctxt, inputStream);
11137
11138 if ((ctxt->directory == NULL) && (directory == NULL))
11139 directory = xmlParserGetDirectory((char *)URL);
11140 if ((ctxt->directory == NULL) && (directory != NULL))
11141 ctxt->directory = directory;
11142
Owen Taylor3473f882001-02-23 17:55:21 +000011143 oldsax = ctxt->sax;
11144 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011145 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011146 newDoc = xmlNewDoc(BAD_CAST "1.0");
11147 if (newDoc == NULL) {
11148 xmlFreeParserCtxt(ctxt);
11149 return(-1);
11150 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011151 if (ctx->myDoc->dict) {
11152 newDoc->dict = ctx->myDoc->dict;
11153 xmlDictReference(newDoc->dict);
11154 }
Owen Taylor3473f882001-02-23 17:55:21 +000011155 if (ctx->myDoc != NULL) {
11156 newDoc->intSubset = ctx->myDoc->intSubset;
11157 newDoc->extSubset = ctx->myDoc->extSubset;
11158 }
11159 if (ctx->myDoc->URL != NULL) {
11160 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11161 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011162 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11163 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011164 ctxt->sax = oldsax;
11165 xmlFreeParserCtxt(ctxt);
11166 newDoc->intSubset = NULL;
11167 newDoc->extSubset = NULL;
11168 xmlFreeDoc(newDoc);
11169 return(-1);
11170 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011171 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011172 nodePush(ctxt, newDoc->children);
11173 if (ctx->myDoc == NULL) {
11174 ctxt->myDoc = newDoc;
11175 } else {
11176 ctxt->myDoc = ctx->myDoc;
11177 newDoc->children->doc = ctx->myDoc;
11178 }
11179
Daniel Veillard87a764e2001-06-20 17:41:10 +000011180 /*
11181 * Get the 4 first bytes and decode the charset
11182 * if enc != XML_CHAR_ENCODING_NONE
11183 * plug some encoding conversion routines.
11184 */
11185 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011186 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11187 start[0] = RAW;
11188 start[1] = NXT(1);
11189 start[2] = NXT(2);
11190 start[3] = NXT(3);
11191 enc = xmlDetectCharEncoding(start, 4);
11192 if (enc != XML_CHAR_ENCODING_NONE) {
11193 xmlSwitchEncoding(ctxt, enc);
11194 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011195 }
11196
Owen Taylor3473f882001-02-23 17:55:21 +000011197 /*
11198 * Parse a possible text declaration first
11199 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011200 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011201 xmlParseTextDecl(ctxt);
11202 }
11203
11204 /*
11205 * Doing validity checking on chunk doesn't make sense
11206 */
11207 ctxt->instate = XML_PARSER_CONTENT;
11208 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011209 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011210 ctxt->loadsubset = ctx->loadsubset;
11211 ctxt->depth = ctx->depth + 1;
11212 ctxt->replaceEntities = ctx->replaceEntities;
11213 if (ctxt->validate) {
11214 ctxt->vctxt.error = ctx->vctxt.error;
11215 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011216 } else {
11217 ctxt->vctxt.error = NULL;
11218 ctxt->vctxt.warning = NULL;
11219 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011220 ctxt->vctxt.nodeTab = NULL;
11221 ctxt->vctxt.nodeNr = 0;
11222 ctxt->vctxt.nodeMax = 0;
11223 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011224 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11225 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011226 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11227 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11228 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011229 ctxt->dictNames = ctx->dictNames;
11230 ctxt->attsDefault = ctx->attsDefault;
11231 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011232 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011233
11234 xmlParseContent(ctxt);
11235
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011236 ctx->validate = ctxt->validate;
11237 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011238 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011239 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011240 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011241 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011242 }
11243 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011244 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 }
11246
11247 if (!ctxt->wellFormed) {
11248 if (ctxt->errNo == 0)
11249 ret = 1;
11250 else
11251 ret = ctxt->errNo;
11252 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011253 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011254 xmlNodePtr cur;
11255
11256 /*
11257 * Return the newly created nodeset after unlinking it from
11258 * they pseudo parent.
11259 */
11260 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011261 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011262 while (cur != NULL) {
11263 cur->parent = NULL;
11264 cur = cur->next;
11265 }
11266 newDoc->children->children = NULL;
11267 }
11268 ret = 0;
11269 }
11270 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011271 ctxt->dict = NULL;
11272 ctxt->attsDefault = NULL;
11273 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011274 xmlFreeParserCtxt(ctxt);
11275 newDoc->intSubset = NULL;
11276 newDoc->extSubset = NULL;
11277 xmlFreeDoc(newDoc);
11278
11279 return(ret);
11280}
11281
11282/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011283 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011284 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011285 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011286 * @sax: the SAX handler bloc (possibly NULL)
11287 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11288 * @depth: Used for loop detection, use 0
11289 * @URL: the URL for the entity to load
11290 * @ID: the System ID for the entity to load
11291 * @list: the return value for the set of parsed nodes
11292 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011293 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011294 *
11295 * Returns 0 if the entity is well formed, -1 in case of args problem and
11296 * the parser error code otherwise
11297 */
11298
Daniel Veillard7d515752003-09-26 19:12:37 +000011299static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011300xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11301 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011302 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011303 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011304 xmlParserCtxtPtr ctxt;
11305 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011306 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011307 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011308 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011309 xmlChar start[4];
11310 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011311
11312 if (depth > 40) {
11313 return(XML_ERR_ENTITY_LOOP);
11314 }
11315
11316
11317
11318 if (list != NULL)
11319 *list = NULL;
11320 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011321 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011322 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011323 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011324
11325
11326 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011327 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011328 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011329 if (oldctxt != NULL) {
11330 ctxt->_private = oldctxt->_private;
11331 ctxt->loadsubset = oldctxt->loadsubset;
11332 ctxt->validate = oldctxt->validate;
11333 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011334 ctxt->record_info = oldctxt->record_info;
11335 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11336 ctxt->node_seq.length = oldctxt->node_seq.length;
11337 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011338 } else {
11339 /*
11340 * Doing validity checking on chunk without context
11341 * doesn't make sense
11342 */
11343 ctxt->_private = NULL;
11344 ctxt->validate = 0;
11345 ctxt->external = 2;
11346 ctxt->loadsubset = 0;
11347 }
Owen Taylor3473f882001-02-23 17:55:21 +000011348 if (sax != NULL) {
11349 oldsax = ctxt->sax;
11350 ctxt->sax = sax;
11351 if (user_data != NULL)
11352 ctxt->userData = user_data;
11353 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011354 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011355 newDoc = xmlNewDoc(BAD_CAST "1.0");
11356 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011357 ctxt->node_seq.maximum = 0;
11358 ctxt->node_seq.length = 0;
11359 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011360 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011361 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011362 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011363 newDoc->intSubset = doc->intSubset;
11364 newDoc->extSubset = doc->extSubset;
11365 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011366 xmlDictReference(newDoc->dict);
11367
Owen Taylor3473f882001-02-23 17:55:21 +000011368 if (doc->URL != NULL) {
11369 newDoc->URL = xmlStrdup(doc->URL);
11370 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011371 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11372 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011373 if (sax != NULL)
11374 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011375 ctxt->node_seq.maximum = 0;
11376 ctxt->node_seq.length = 0;
11377 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011378 xmlFreeParserCtxt(ctxt);
11379 newDoc->intSubset = NULL;
11380 newDoc->extSubset = NULL;
11381 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011382 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011383 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011384 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011385 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011386 ctxt->myDoc = doc;
11387 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011388
Daniel Veillard87a764e2001-06-20 17:41:10 +000011389 /*
11390 * Get the 4 first bytes and decode the charset
11391 * if enc != XML_CHAR_ENCODING_NONE
11392 * plug some encoding conversion routines.
11393 */
11394 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011395 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11396 start[0] = RAW;
11397 start[1] = NXT(1);
11398 start[2] = NXT(2);
11399 start[3] = NXT(3);
11400 enc = xmlDetectCharEncoding(start, 4);
11401 if (enc != XML_CHAR_ENCODING_NONE) {
11402 xmlSwitchEncoding(ctxt, enc);
11403 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011404 }
11405
Owen Taylor3473f882001-02-23 17:55:21 +000011406 /*
11407 * Parse a possible text declaration first
11408 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011409 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011410 xmlParseTextDecl(ctxt);
11411 }
11412
Owen Taylor3473f882001-02-23 17:55:21 +000011413 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011414 ctxt->depth = depth;
11415
11416 xmlParseContent(ctxt);
11417
Daniel Veillard561b7f82002-03-20 21:55:57 +000011418 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011419 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011420 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011421 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011422 }
11423 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011424 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011425 }
11426
11427 if (!ctxt->wellFormed) {
11428 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011429 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011430 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011431 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011432 } else {
11433 if (list != NULL) {
11434 xmlNodePtr cur;
11435
11436 /*
11437 * Return the newly created nodeset after unlinking it from
11438 * they pseudo parent.
11439 */
11440 cur = newDoc->children->children;
11441 *list = cur;
11442 while (cur != NULL) {
11443 cur->parent = NULL;
11444 cur = cur->next;
11445 }
11446 newDoc->children->children = NULL;
11447 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011448 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011449 }
11450 if (sax != NULL)
11451 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011452 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11453 oldctxt->node_seq.length = ctxt->node_seq.length;
11454 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011455 ctxt->node_seq.maximum = 0;
11456 ctxt->node_seq.length = 0;
11457 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011458 xmlFreeParserCtxt(ctxt);
11459 newDoc->intSubset = NULL;
11460 newDoc->extSubset = NULL;
11461 xmlFreeDoc(newDoc);
11462
11463 return(ret);
11464}
11465
Daniel Veillard81273902003-09-30 00:43:48 +000011466#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011467/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011468 * xmlParseExternalEntity:
11469 * @doc: the document the chunk pertains to
11470 * @sax: the SAX handler bloc (possibly NULL)
11471 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11472 * @depth: Used for loop detection, use 0
11473 * @URL: the URL for the entity to load
11474 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011475 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011476 *
11477 * Parse an external general entity
11478 * An external general parsed entity is well-formed if it matches the
11479 * production labeled extParsedEnt.
11480 *
11481 * [78] extParsedEnt ::= TextDecl? content
11482 *
11483 * Returns 0 if the entity is well formed, -1 in case of args problem and
11484 * the parser error code otherwise
11485 */
11486
11487int
11488xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011489 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011490 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011491 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011492}
11493
11494/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011495 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011496 * @doc: the document the chunk pertains to
11497 * @sax: the SAX handler bloc (possibly NULL)
11498 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11499 * @depth: Used for loop detection, use 0
11500 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011501 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011502 *
11503 * Parse a well-balanced chunk of an XML document
11504 * called by the parser
11505 * The allowed sequence for the Well Balanced Chunk is the one defined by
11506 * the content production in the XML grammar:
11507 *
11508 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11509 *
11510 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11511 * the parser error code otherwise
11512 */
11513
11514int
11515xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011516 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011517 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11518 depth, string, lst, 0 );
11519}
Daniel Veillard81273902003-09-30 00:43:48 +000011520#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011521
11522/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011523 * xmlParseBalancedChunkMemoryInternal:
11524 * @oldctxt: the existing parsing context
11525 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11526 * @user_data: the user data field for the parser context
11527 * @lst: the return value for the set of parsed nodes
11528 *
11529 *
11530 * Parse a well-balanced chunk of an XML document
11531 * called by the parser
11532 * The allowed sequence for the Well Balanced Chunk is the one defined by
11533 * the content production in the XML grammar:
11534 *
11535 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11536 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011537 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11538 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011539 *
11540 * In case recover is set to 1, the nodelist will not be empty even if
11541 * the parsed chunk is not well balanced.
11542 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011543static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011544xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11545 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11546 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011547 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011548 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011549 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011550 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011551 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011552 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011553 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011554
11555 if (oldctxt->depth > 40) {
11556 return(XML_ERR_ENTITY_LOOP);
11557 }
11558
11559
11560 if (lst != NULL)
11561 *lst = NULL;
11562 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011563 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011564
11565 size = xmlStrlen(string);
11566
11567 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011568 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011569 if (user_data != NULL)
11570 ctxt->userData = user_data;
11571 else
11572 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011573 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11574 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011575 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11576 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11577 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011578
11579 oldsax = ctxt->sax;
11580 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011581 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011582 ctxt->replaceEntities = oldctxt->replaceEntities;
11583 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011584
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011585 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011586 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011587 newDoc = xmlNewDoc(BAD_CAST "1.0");
11588 if (newDoc == NULL) {
11589 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011590 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011591 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011592 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011593 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011594 newDoc->dict = ctxt->dict;
11595 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011596 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011597 } else {
11598 ctxt->myDoc = oldctxt->myDoc;
11599 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011600 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011601 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011602 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11603 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011604 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011605 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011606 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011607 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011608 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011609 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011610 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011611 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011612 ctxt->myDoc->children = NULL;
11613 ctxt->myDoc->last = NULL;
11614 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011615 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011616 ctxt->instate = XML_PARSER_CONTENT;
11617 ctxt->depth = oldctxt->depth + 1;
11618
Daniel Veillard328f48c2002-11-15 15:24:34 +000011619 ctxt->validate = 0;
11620 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011621 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11622 /*
11623 * ID/IDREF registration will be done in xmlValidateElement below
11624 */
11625 ctxt->loadsubset |= XML_SKIP_IDS;
11626 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011627 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011628 ctxt->attsDefault = oldctxt->attsDefault;
11629 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011630
Daniel Veillard68e9e742002-11-16 15:35:11 +000011631 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011632 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011633 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011634 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011635 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011636 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011637 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011638 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011639 }
11640
11641 if (!ctxt->wellFormed) {
11642 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011643 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011645 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011646 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011647 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011648 }
11649
William M. Brack7b9154b2003-09-27 19:23:50 +000011650 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011651 xmlNodePtr cur;
11652
11653 /*
11654 * Return the newly created nodeset after unlinking it from
11655 * they pseudo parent.
11656 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011657 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011658 *lst = cur;
11659 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011660#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011661 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11662 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11663 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011664 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11665 oldctxt->myDoc, cur);
11666 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011667#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011668 cur->parent = NULL;
11669 cur = cur->next;
11670 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011671 ctxt->myDoc->children->children = NULL;
11672 }
11673 if (ctxt->myDoc != NULL) {
11674 xmlFreeNode(ctxt->myDoc->children);
11675 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011676 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011677 }
11678
11679 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011680 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011681 ctxt->attsDefault = NULL;
11682 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011683 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011684 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011685 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011686 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011687
11688 return(ret);
11689}
11690
Daniel Veillard29b17482004-08-16 00:39:03 +000011691/**
11692 * xmlParseInNodeContext:
11693 * @node: the context node
11694 * @data: the input string
11695 * @datalen: the input string length in bytes
11696 * @options: a combination of xmlParserOption
11697 * @lst: the return value for the set of parsed nodes
11698 *
11699 * Parse a well-balanced chunk of an XML document
11700 * within the context (DTD, namespaces, etc ...) of the given node.
11701 *
11702 * The allowed sequence for the data is a Well Balanced Chunk defined by
11703 * the content production in the XML grammar:
11704 *
11705 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11706 *
11707 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11708 * error code otherwise
11709 */
11710xmlParserErrors
11711xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11712 int options, xmlNodePtr *lst) {
11713#ifdef SAX2
11714 xmlParserCtxtPtr ctxt;
11715 xmlDocPtr doc = NULL;
11716 xmlNodePtr fake, cur;
11717 int nsnr = 0;
11718
11719 xmlParserErrors ret = XML_ERR_OK;
11720
11721 /*
11722 * check all input parameters, grab the document
11723 */
11724 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11725 return(XML_ERR_INTERNAL_ERROR);
11726 switch (node->type) {
11727 case XML_ELEMENT_NODE:
11728 case XML_ATTRIBUTE_NODE:
11729 case XML_TEXT_NODE:
11730 case XML_CDATA_SECTION_NODE:
11731 case XML_ENTITY_REF_NODE:
11732 case XML_PI_NODE:
11733 case XML_COMMENT_NODE:
11734 case XML_DOCUMENT_NODE:
11735 case XML_HTML_DOCUMENT_NODE:
11736 break;
11737 default:
11738 return(XML_ERR_INTERNAL_ERROR);
11739
11740 }
11741 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11742 (node->type != XML_DOCUMENT_NODE) &&
11743 (node->type != XML_HTML_DOCUMENT_NODE))
11744 node = node->parent;
11745 if (node == NULL)
11746 return(XML_ERR_INTERNAL_ERROR);
11747 if (node->type == XML_ELEMENT_NODE)
11748 doc = node->doc;
11749 else
11750 doc = (xmlDocPtr) node;
11751 if (doc == NULL)
11752 return(XML_ERR_INTERNAL_ERROR);
11753
11754 /*
11755 * allocate a context and set-up everything not related to the
11756 * node position in the tree
11757 */
11758 if (doc->type == XML_DOCUMENT_NODE)
11759 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11760#ifdef LIBXML_HTML_ENABLED
11761 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11762 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11763#endif
11764 else
11765 return(XML_ERR_INTERNAL_ERROR);
11766
11767 if (ctxt == NULL)
11768 return(XML_ERR_NO_MEMORY);
11769 fake = xmlNewComment(NULL);
11770 if (fake == NULL) {
11771 xmlFreeParserCtxt(ctxt);
11772 return(XML_ERR_NO_MEMORY);
11773 }
11774 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011775
11776 /*
11777 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11778 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11779 * we must wait until the last moment to free the original one.
11780 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011781 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011782 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011783 xmlDictFree(ctxt->dict);
11784 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011785 } else
11786 options |= XML_PARSE_NODICT;
11787
11788 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011789 xmlDetectSAX2(ctxt);
11790 ctxt->myDoc = doc;
11791
11792 if (node->type == XML_ELEMENT_NODE) {
11793 nodePush(ctxt, node);
11794 /*
11795 * initialize the SAX2 namespaces stack
11796 */
11797 cur = node;
11798 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11799 xmlNsPtr ns = cur->nsDef;
11800 const xmlChar *iprefix, *ihref;
11801
11802 while (ns != NULL) {
11803 if (ctxt->dict) {
11804 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11805 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11806 } else {
11807 iprefix = ns->prefix;
11808 ihref = ns->href;
11809 }
11810
11811 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11812 nsPush(ctxt, iprefix, ihref);
11813 nsnr++;
11814 }
11815 ns = ns->next;
11816 }
11817 cur = cur->parent;
11818 }
11819 ctxt->instate = XML_PARSER_CONTENT;
11820 }
11821
11822 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11823 /*
11824 * ID/IDREF registration will be done in xmlValidateElement below
11825 */
11826 ctxt->loadsubset |= XML_SKIP_IDS;
11827 }
11828
Daniel Veillard499cc922006-01-18 17:22:35 +000011829#ifdef LIBXML_HTML_ENABLED
11830 if (doc->type == XML_HTML_DOCUMENT_NODE)
11831 __htmlParseContent(ctxt);
11832 else
11833#endif
11834 xmlParseContent(ctxt);
11835
Daniel Veillard29b17482004-08-16 00:39:03 +000011836 nsPop(ctxt, nsnr);
11837 if ((RAW == '<') && (NXT(1) == '/')) {
11838 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11839 } else if (RAW != 0) {
11840 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11841 }
11842 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11843 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11844 ctxt->wellFormed = 0;
11845 }
11846
11847 if (!ctxt->wellFormed) {
11848 if (ctxt->errNo == 0)
11849 ret = XML_ERR_INTERNAL_ERROR;
11850 else
11851 ret = (xmlParserErrors)ctxt->errNo;
11852 } else {
11853 ret = XML_ERR_OK;
11854 }
11855
11856 /*
11857 * Return the newly created nodeset after unlinking it from
11858 * the pseudo sibling.
11859 */
11860
11861 cur = fake->next;
11862 fake->next = NULL;
11863 node->last = fake;
11864
11865 if (cur != NULL) {
11866 cur->prev = NULL;
11867 }
11868
11869 *lst = cur;
11870
11871 while (cur != NULL) {
11872 cur->parent = NULL;
11873 cur = cur->next;
11874 }
11875
11876 xmlUnlinkNode(fake);
11877 xmlFreeNode(fake);
11878
11879
11880 if (ret != XML_ERR_OK) {
11881 xmlFreeNodeList(*lst);
11882 *lst = NULL;
11883 }
William M. Brackc3f81342004-10-03 01:22:44 +000011884
William M. Brackb7b54de2004-10-06 16:38:01 +000011885 if (doc->dict != NULL)
11886 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011887 xmlFreeParserCtxt(ctxt);
11888
11889 return(ret);
11890#else /* !SAX2 */
11891 return(XML_ERR_INTERNAL_ERROR);
11892#endif
11893}
11894
Daniel Veillard81273902003-09-30 00:43:48 +000011895#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011896/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011897 * xmlParseBalancedChunkMemoryRecover:
11898 * @doc: the document the chunk pertains to
11899 * @sax: the SAX handler bloc (possibly NULL)
11900 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11901 * @depth: Used for loop detection, use 0
11902 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11903 * @lst: the return value for the set of parsed nodes
11904 * @recover: return nodes even if the data is broken (use 0)
11905 *
11906 *
11907 * Parse a well-balanced chunk of an XML document
11908 * called by the parser
11909 * The allowed sequence for the Well Balanced Chunk is the one defined by
11910 * the content production in the XML grammar:
11911 *
11912 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11913 *
11914 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11915 * the parser error code otherwise
11916 *
11917 * In case recover is set to 1, the nodelist will not be empty even if
11918 * the parsed chunk is not well balanced.
11919 */
11920int
11921xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11922 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11923 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011924 xmlParserCtxtPtr ctxt;
11925 xmlDocPtr newDoc;
11926 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011927 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011928 int size;
11929 int ret = 0;
11930
11931 if (depth > 40) {
11932 return(XML_ERR_ENTITY_LOOP);
11933 }
11934
11935
Daniel Veillardcda96922001-08-21 10:56:31 +000011936 if (lst != NULL)
11937 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011938 if (string == NULL)
11939 return(-1);
11940
11941 size = xmlStrlen(string);
11942
11943 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11944 if (ctxt == NULL) return(-1);
11945 ctxt->userData = ctxt;
11946 if (sax != NULL) {
11947 oldsax = ctxt->sax;
11948 ctxt->sax = sax;
11949 if (user_data != NULL)
11950 ctxt->userData = user_data;
11951 }
11952 newDoc = xmlNewDoc(BAD_CAST "1.0");
11953 if (newDoc == NULL) {
11954 xmlFreeParserCtxt(ctxt);
11955 return(-1);
11956 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011957 if ((doc != NULL) && (doc->dict != NULL)) {
11958 xmlDictFree(ctxt->dict);
11959 ctxt->dict = doc->dict;
11960 xmlDictReference(ctxt->dict);
11961 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11962 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11963 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11964 ctxt->dictNames = 1;
11965 } else {
11966 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11967 }
Owen Taylor3473f882001-02-23 17:55:21 +000011968 if (doc != NULL) {
11969 newDoc->intSubset = doc->intSubset;
11970 newDoc->extSubset = doc->extSubset;
11971 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011972 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11973 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011974 if (sax != NULL)
11975 ctxt->sax = oldsax;
11976 xmlFreeParserCtxt(ctxt);
11977 newDoc->intSubset = NULL;
11978 newDoc->extSubset = NULL;
11979 xmlFreeDoc(newDoc);
11980 return(-1);
11981 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011982 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11983 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011984 if (doc == NULL) {
11985 ctxt->myDoc = newDoc;
11986 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011987 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011988 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000011989 /* Ensure that doc has XML spec namespace */
11990 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
11991 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000011992 }
11993 ctxt->instate = XML_PARSER_CONTENT;
11994 ctxt->depth = depth;
11995
11996 /*
11997 * Doing validity checking on chunk doesn't make sense
11998 */
11999 ctxt->validate = 0;
12000 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012001 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012002
Daniel Veillardb39bc392002-10-26 19:29:51 +000012003 if ( doc != NULL ){
12004 content = doc->children;
12005 doc->children = NULL;
12006 xmlParseContent(ctxt);
12007 doc->children = content;
12008 }
12009 else {
12010 xmlParseContent(ctxt);
12011 }
Owen Taylor3473f882001-02-23 17:55:21 +000012012 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012013 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012014 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012015 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012016 }
12017 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012018 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012019 }
12020
12021 if (!ctxt->wellFormed) {
12022 if (ctxt->errNo == 0)
12023 ret = 1;
12024 else
12025 ret = ctxt->errNo;
12026 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012027 ret = 0;
12028 }
12029
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012030 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12031 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012032
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012033 /*
12034 * Return the newly created nodeset after unlinking it from
12035 * they pseudo parent.
12036 */
12037 cur = newDoc->children->children;
12038 *lst = cur;
12039 while (cur != NULL) {
12040 xmlSetTreeDoc(cur, doc);
12041 cur->parent = NULL;
12042 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012043 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012044 newDoc->children->children = NULL;
12045 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012046
Owen Taylor3473f882001-02-23 17:55:21 +000012047 if (sax != NULL)
12048 ctxt->sax = oldsax;
12049 xmlFreeParserCtxt(ctxt);
12050 newDoc->intSubset = NULL;
12051 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012052 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012053 xmlFreeDoc(newDoc);
12054
12055 return(ret);
12056}
12057
12058/**
12059 * xmlSAXParseEntity:
12060 * @sax: the SAX handler block
12061 * @filename: the filename
12062 *
12063 * parse an XML external entity out of context and build a tree.
12064 * It use the given SAX function block to handle the parsing callback.
12065 * If sax is NULL, fallback to the default DOM tree building routines.
12066 *
12067 * [78] extParsedEnt ::= TextDecl? content
12068 *
12069 * This correspond to a "Well Balanced" chunk
12070 *
12071 * Returns the resulting document tree
12072 */
12073
12074xmlDocPtr
12075xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12076 xmlDocPtr ret;
12077 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012078
12079 ctxt = xmlCreateFileParserCtxt(filename);
12080 if (ctxt == NULL) {
12081 return(NULL);
12082 }
12083 if (sax != NULL) {
12084 if (ctxt->sax != NULL)
12085 xmlFree(ctxt->sax);
12086 ctxt->sax = sax;
12087 ctxt->userData = NULL;
12088 }
12089
Owen Taylor3473f882001-02-23 17:55:21 +000012090 xmlParseExtParsedEnt(ctxt);
12091
12092 if (ctxt->wellFormed)
12093 ret = ctxt->myDoc;
12094 else {
12095 ret = NULL;
12096 xmlFreeDoc(ctxt->myDoc);
12097 ctxt->myDoc = NULL;
12098 }
12099 if (sax != NULL)
12100 ctxt->sax = NULL;
12101 xmlFreeParserCtxt(ctxt);
12102
12103 return(ret);
12104}
12105
12106/**
12107 * xmlParseEntity:
12108 * @filename: the filename
12109 *
12110 * parse an XML external entity out of context and build a tree.
12111 *
12112 * [78] extParsedEnt ::= TextDecl? content
12113 *
12114 * This correspond to a "Well Balanced" chunk
12115 *
12116 * Returns the resulting document tree
12117 */
12118
12119xmlDocPtr
12120xmlParseEntity(const char *filename) {
12121 return(xmlSAXParseEntity(NULL, filename));
12122}
Daniel Veillard81273902003-09-30 00:43:48 +000012123#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012124
12125/**
12126 * xmlCreateEntityParserCtxt:
12127 * @URL: the entity URL
12128 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012129 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012130 *
12131 * Create a parser context for an external entity
12132 * Automatic support for ZLIB/Compress compressed document is provided
12133 * by default if found at compile-time.
12134 *
12135 * Returns the new parser context or NULL
12136 */
12137xmlParserCtxtPtr
12138xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12139 const xmlChar *base) {
12140 xmlParserCtxtPtr ctxt;
12141 xmlParserInputPtr inputStream;
12142 char *directory = NULL;
12143 xmlChar *uri;
12144
12145 ctxt = xmlNewParserCtxt();
12146 if (ctxt == NULL) {
12147 return(NULL);
12148 }
12149
12150 uri = xmlBuildURI(URL, base);
12151
12152 if (uri == NULL) {
12153 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12154 if (inputStream == NULL) {
12155 xmlFreeParserCtxt(ctxt);
12156 return(NULL);
12157 }
12158
12159 inputPush(ctxt, inputStream);
12160
12161 if ((ctxt->directory == NULL) && (directory == NULL))
12162 directory = xmlParserGetDirectory((char *)URL);
12163 if ((ctxt->directory == NULL) && (directory != NULL))
12164 ctxt->directory = directory;
12165 } else {
12166 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12167 if (inputStream == NULL) {
12168 xmlFree(uri);
12169 xmlFreeParserCtxt(ctxt);
12170 return(NULL);
12171 }
12172
12173 inputPush(ctxt, inputStream);
12174
12175 if ((ctxt->directory == NULL) && (directory == NULL))
12176 directory = xmlParserGetDirectory((char *)uri);
12177 if ((ctxt->directory == NULL) && (directory != NULL))
12178 ctxt->directory = directory;
12179 xmlFree(uri);
12180 }
Owen Taylor3473f882001-02-23 17:55:21 +000012181 return(ctxt);
12182}
12183
12184/************************************************************************
12185 * *
12186 * Front ends when parsing from a file *
12187 * *
12188 ************************************************************************/
12189
12190/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012191 * xmlCreateURLParserCtxt:
12192 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012193 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012194 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012195 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012196 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012197 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012198 *
12199 * Returns the new parser context or NULL
12200 */
12201xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012202xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012203{
12204 xmlParserCtxtPtr ctxt;
12205 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012206 char *directory = NULL;
12207
Owen Taylor3473f882001-02-23 17:55:21 +000012208 ctxt = xmlNewParserCtxt();
12209 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012210 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012211 return(NULL);
12212 }
12213
Daniel Veillarddf292f72005-01-16 19:00:15 +000012214 if (options)
12215 xmlCtxtUseOptions(ctxt, options);
12216 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012217
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012218 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012219 if (inputStream == NULL) {
12220 xmlFreeParserCtxt(ctxt);
12221 return(NULL);
12222 }
12223
Owen Taylor3473f882001-02-23 17:55:21 +000012224 inputPush(ctxt, inputStream);
12225 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012226 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012227 if ((ctxt->directory == NULL) && (directory != NULL))
12228 ctxt->directory = directory;
12229
12230 return(ctxt);
12231}
12232
Daniel Veillard61b93382003-11-03 14:28:31 +000012233/**
12234 * xmlCreateFileParserCtxt:
12235 * @filename: the filename
12236 *
12237 * Create a parser context for a file content.
12238 * Automatic support for ZLIB/Compress compressed document is provided
12239 * by default if found at compile-time.
12240 *
12241 * Returns the new parser context or NULL
12242 */
12243xmlParserCtxtPtr
12244xmlCreateFileParserCtxt(const char *filename)
12245{
12246 return(xmlCreateURLParserCtxt(filename, 0));
12247}
12248
Daniel Veillard81273902003-09-30 00:43:48 +000012249#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012250/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012251 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012252 * @sax: the SAX handler block
12253 * @filename: the filename
12254 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12255 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012256 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012257 *
12258 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12259 * compressed document is provided by default if found at compile-time.
12260 * It use the given SAX function block to handle the parsing callback.
12261 * If sax is NULL, fallback to the default DOM tree building routines.
12262 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012263 * User data (void *) is stored within the parser context in the
12264 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012265 *
Owen Taylor3473f882001-02-23 17:55:21 +000012266 * Returns the resulting document tree
12267 */
12268
12269xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012270xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12271 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012272 xmlDocPtr ret;
12273 xmlParserCtxtPtr ctxt;
12274 char *directory = NULL;
12275
Daniel Veillard635ef722001-10-29 11:48:19 +000012276 xmlInitParser();
12277
Owen Taylor3473f882001-02-23 17:55:21 +000012278 ctxt = xmlCreateFileParserCtxt(filename);
12279 if (ctxt == NULL) {
12280 return(NULL);
12281 }
12282 if (sax != NULL) {
12283 if (ctxt->sax != NULL)
12284 xmlFree(ctxt->sax);
12285 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012286 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012287 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012288 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012289 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012290 }
Owen Taylor3473f882001-02-23 17:55:21 +000012291
12292 if ((ctxt->directory == NULL) && (directory == NULL))
12293 directory = xmlParserGetDirectory(filename);
12294 if ((ctxt->directory == NULL) && (directory != NULL))
12295 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12296
Daniel Veillarddad3f682002-11-17 16:47:27 +000012297 ctxt->recovery = recovery;
12298
Owen Taylor3473f882001-02-23 17:55:21 +000012299 xmlParseDocument(ctxt);
12300
William M. Brackc07329e2003-09-08 01:57:30 +000012301 if ((ctxt->wellFormed) || recovery) {
12302 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012303 if (ret != NULL) {
12304 if (ctxt->input->buf->compressed > 0)
12305 ret->compression = 9;
12306 else
12307 ret->compression = ctxt->input->buf->compressed;
12308 }
William M. Brackc07329e2003-09-08 01:57:30 +000012309 }
Owen Taylor3473f882001-02-23 17:55:21 +000012310 else {
12311 ret = NULL;
12312 xmlFreeDoc(ctxt->myDoc);
12313 ctxt->myDoc = NULL;
12314 }
12315 if (sax != NULL)
12316 ctxt->sax = NULL;
12317 xmlFreeParserCtxt(ctxt);
12318
12319 return(ret);
12320}
12321
12322/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012323 * xmlSAXParseFile:
12324 * @sax: the SAX handler block
12325 * @filename: the filename
12326 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12327 * documents
12328 *
12329 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12330 * compressed document is provided by default if found at compile-time.
12331 * It use the given SAX function block to handle the parsing callback.
12332 * If sax is NULL, fallback to the default DOM tree building routines.
12333 *
12334 * Returns the resulting document tree
12335 */
12336
12337xmlDocPtr
12338xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12339 int recovery) {
12340 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12341}
12342
12343/**
Owen Taylor3473f882001-02-23 17:55:21 +000012344 * xmlRecoverDoc:
12345 * @cur: a pointer to an array of xmlChar
12346 *
12347 * parse an XML in-memory document and build a tree.
12348 * In the case the document is not Well Formed, a tree is built anyway
12349 *
12350 * Returns the resulting document tree
12351 */
12352
12353xmlDocPtr
12354xmlRecoverDoc(xmlChar *cur) {
12355 return(xmlSAXParseDoc(NULL, cur, 1));
12356}
12357
12358/**
12359 * xmlParseFile:
12360 * @filename: the filename
12361 *
12362 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12363 * compressed document is provided by default if found at compile-time.
12364 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012365 * Returns the resulting document tree if the file was wellformed,
12366 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012367 */
12368
12369xmlDocPtr
12370xmlParseFile(const char *filename) {
12371 return(xmlSAXParseFile(NULL, filename, 0));
12372}
12373
12374/**
12375 * xmlRecoverFile:
12376 * @filename: the filename
12377 *
12378 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12379 * compressed document is provided by default if found at compile-time.
12380 * In the case the document is not Well Formed, a tree is built anyway
12381 *
12382 * Returns the resulting document tree
12383 */
12384
12385xmlDocPtr
12386xmlRecoverFile(const char *filename) {
12387 return(xmlSAXParseFile(NULL, filename, 1));
12388}
12389
12390
12391/**
12392 * xmlSetupParserForBuffer:
12393 * @ctxt: an XML parser context
12394 * @buffer: a xmlChar * buffer
12395 * @filename: a file name
12396 *
12397 * Setup the parser context to parse a new buffer; Clears any prior
12398 * contents from the parser context. The buffer parameter must not be
12399 * NULL, but the filename parameter can be
12400 */
12401void
12402xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12403 const char* filename)
12404{
12405 xmlParserInputPtr input;
12406
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012407 if ((ctxt == NULL) || (buffer == NULL))
12408 return;
12409
Owen Taylor3473f882001-02-23 17:55:21 +000012410 input = xmlNewInputStream(ctxt);
12411 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012412 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012413 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012414 return;
12415 }
12416
12417 xmlClearParserCtxt(ctxt);
12418 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012419 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012420 input->base = buffer;
12421 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012422 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012423 inputPush(ctxt, input);
12424}
12425
12426/**
12427 * xmlSAXUserParseFile:
12428 * @sax: a SAX handler
12429 * @user_data: The user data returned on SAX callbacks
12430 * @filename: a file name
12431 *
12432 * parse an XML file and call the given SAX handler routines.
12433 * Automatic support for ZLIB/Compress compressed document is provided
12434 *
12435 * Returns 0 in case of success or a error number otherwise
12436 */
12437int
12438xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12439 const char *filename) {
12440 int ret = 0;
12441 xmlParserCtxtPtr ctxt;
12442
12443 ctxt = xmlCreateFileParserCtxt(filename);
12444 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012445#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012446 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012447#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012448 xmlFree(ctxt->sax);
12449 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012450 xmlDetectSAX2(ctxt);
12451
Owen Taylor3473f882001-02-23 17:55:21 +000012452 if (user_data != NULL)
12453 ctxt->userData = user_data;
12454
12455 xmlParseDocument(ctxt);
12456
12457 if (ctxt->wellFormed)
12458 ret = 0;
12459 else {
12460 if (ctxt->errNo != 0)
12461 ret = ctxt->errNo;
12462 else
12463 ret = -1;
12464 }
12465 if (sax != NULL)
12466 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012467 if (ctxt->myDoc != NULL) {
12468 xmlFreeDoc(ctxt->myDoc);
12469 ctxt->myDoc = NULL;
12470 }
Owen Taylor3473f882001-02-23 17:55:21 +000012471 xmlFreeParserCtxt(ctxt);
12472
12473 return ret;
12474}
Daniel Veillard81273902003-09-30 00:43:48 +000012475#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012476
12477/************************************************************************
12478 * *
12479 * Front ends when parsing from memory *
12480 * *
12481 ************************************************************************/
12482
12483/**
12484 * xmlCreateMemoryParserCtxt:
12485 * @buffer: a pointer to a char array
12486 * @size: the size of the array
12487 *
12488 * Create a parser context for an XML in-memory document.
12489 *
12490 * Returns the new parser context or NULL
12491 */
12492xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012493xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012494 xmlParserCtxtPtr ctxt;
12495 xmlParserInputPtr input;
12496 xmlParserInputBufferPtr buf;
12497
12498 if (buffer == NULL)
12499 return(NULL);
12500 if (size <= 0)
12501 return(NULL);
12502
12503 ctxt = xmlNewParserCtxt();
12504 if (ctxt == NULL)
12505 return(NULL);
12506
Daniel Veillard53350552003-09-18 13:35:51 +000012507 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012508 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012509 if (buf == NULL) {
12510 xmlFreeParserCtxt(ctxt);
12511 return(NULL);
12512 }
Owen Taylor3473f882001-02-23 17:55:21 +000012513
12514 input = xmlNewInputStream(ctxt);
12515 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012516 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012517 xmlFreeParserCtxt(ctxt);
12518 return(NULL);
12519 }
12520
12521 input->filename = NULL;
12522 input->buf = buf;
12523 input->base = input->buf->buffer->content;
12524 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012525 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012526
12527 inputPush(ctxt, input);
12528 return(ctxt);
12529}
12530
Daniel Veillard81273902003-09-30 00:43:48 +000012531#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012532/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012533 * xmlSAXParseMemoryWithData:
12534 * @sax: the SAX handler block
12535 * @buffer: an pointer to a char array
12536 * @size: the size of the array
12537 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12538 * documents
12539 * @data: the userdata
12540 *
12541 * parse an XML in-memory block and use the given SAX function block
12542 * to handle the parsing callback. If sax is NULL, fallback to the default
12543 * DOM tree building routines.
12544 *
12545 * User data (void *) is stored within the parser context in the
12546 * context's _private member, so it is available nearly everywhere in libxml
12547 *
12548 * Returns the resulting document tree
12549 */
12550
12551xmlDocPtr
12552xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12553 int size, int recovery, void *data) {
12554 xmlDocPtr ret;
12555 xmlParserCtxtPtr ctxt;
12556
12557 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12558 if (ctxt == NULL) return(NULL);
12559 if (sax != NULL) {
12560 if (ctxt->sax != NULL)
12561 xmlFree(ctxt->sax);
12562 ctxt->sax = sax;
12563 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012564 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012565 if (data!=NULL) {
12566 ctxt->_private=data;
12567 }
12568
Daniel Veillardadba5f12003-04-04 16:09:01 +000012569 ctxt->recovery = recovery;
12570
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012571 xmlParseDocument(ctxt);
12572
12573 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12574 else {
12575 ret = NULL;
12576 xmlFreeDoc(ctxt->myDoc);
12577 ctxt->myDoc = NULL;
12578 }
12579 if (sax != NULL)
12580 ctxt->sax = NULL;
12581 xmlFreeParserCtxt(ctxt);
12582
12583 return(ret);
12584}
12585
12586/**
Owen Taylor3473f882001-02-23 17:55:21 +000012587 * xmlSAXParseMemory:
12588 * @sax: the SAX handler block
12589 * @buffer: an pointer to a char array
12590 * @size: the size of the array
12591 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12592 * documents
12593 *
12594 * parse an XML in-memory block and use the given SAX function block
12595 * to handle the parsing callback. If sax is NULL, fallback to the default
12596 * DOM tree building routines.
12597 *
12598 * Returns the resulting document tree
12599 */
12600xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012601xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12602 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012603 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012604}
12605
12606/**
12607 * xmlParseMemory:
12608 * @buffer: an pointer to a char array
12609 * @size: the size of the array
12610 *
12611 * parse an XML in-memory block and build a tree.
12612 *
12613 * Returns the resulting document tree
12614 */
12615
Daniel Veillard50822cb2001-07-26 20:05:51 +000012616xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012617 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12618}
12619
12620/**
12621 * xmlRecoverMemory:
12622 * @buffer: an pointer to a char array
12623 * @size: the size of the array
12624 *
12625 * parse an XML in-memory block and build a tree.
12626 * In the case the document is not Well Formed, a tree is built anyway
12627 *
12628 * Returns the resulting document tree
12629 */
12630
Daniel Veillard50822cb2001-07-26 20:05:51 +000012631xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012632 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12633}
12634
12635/**
12636 * xmlSAXUserParseMemory:
12637 * @sax: a SAX handler
12638 * @user_data: The user data returned on SAX callbacks
12639 * @buffer: an in-memory XML document input
12640 * @size: the length of the XML document in bytes
12641 *
12642 * A better SAX parsing routine.
12643 * parse an XML in-memory buffer and call the given SAX handler routines.
12644 *
12645 * Returns 0 in case of success or a error number otherwise
12646 */
12647int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012648 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012649 int ret = 0;
12650 xmlParserCtxtPtr ctxt;
12651 xmlSAXHandlerPtr oldsax = NULL;
12652
Daniel Veillard9e923512002-08-14 08:48:52 +000012653 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012654 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12655 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012656 oldsax = ctxt->sax;
12657 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012658 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012659 if (user_data != NULL)
12660 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012661
12662 xmlParseDocument(ctxt);
12663
12664 if (ctxt->wellFormed)
12665 ret = 0;
12666 else {
12667 if (ctxt->errNo != 0)
12668 ret = ctxt->errNo;
12669 else
12670 ret = -1;
12671 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012672 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012673 if (ctxt->myDoc != NULL) {
12674 xmlFreeDoc(ctxt->myDoc);
12675 ctxt->myDoc = NULL;
12676 }
Owen Taylor3473f882001-02-23 17:55:21 +000012677 xmlFreeParserCtxt(ctxt);
12678
12679 return ret;
12680}
Daniel Veillard81273902003-09-30 00:43:48 +000012681#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012682
12683/**
12684 * xmlCreateDocParserCtxt:
12685 * @cur: a pointer to an array of xmlChar
12686 *
12687 * Creates a parser context for an XML in-memory document.
12688 *
12689 * Returns the new parser context or NULL
12690 */
12691xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012692xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012693 int len;
12694
12695 if (cur == NULL)
12696 return(NULL);
12697 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012698 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012699}
12700
Daniel Veillard81273902003-09-30 00:43:48 +000012701#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012702/**
12703 * xmlSAXParseDoc:
12704 * @sax: the SAX handler block
12705 * @cur: a pointer to an array of xmlChar
12706 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12707 * documents
12708 *
12709 * parse an XML in-memory document and build a tree.
12710 * It use the given SAX function block to handle the parsing callback.
12711 * If sax is NULL, fallback to the default DOM tree building routines.
12712 *
12713 * Returns the resulting document tree
12714 */
12715
12716xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012717xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012718 xmlDocPtr ret;
12719 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012720 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012721
Daniel Veillard38936062004-11-04 17:45:11 +000012722 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012723
12724
12725 ctxt = xmlCreateDocParserCtxt(cur);
12726 if (ctxt == NULL) return(NULL);
12727 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012728 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012729 ctxt->sax = sax;
12730 ctxt->userData = NULL;
12731 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012732 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012733
12734 xmlParseDocument(ctxt);
12735 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12736 else {
12737 ret = NULL;
12738 xmlFreeDoc(ctxt->myDoc);
12739 ctxt->myDoc = NULL;
12740 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012741 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012742 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012743 xmlFreeParserCtxt(ctxt);
12744
12745 return(ret);
12746}
12747
12748/**
12749 * xmlParseDoc:
12750 * @cur: a pointer to an array of xmlChar
12751 *
12752 * parse an XML in-memory document and build a tree.
12753 *
12754 * Returns the resulting document tree
12755 */
12756
12757xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012758xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012759 return(xmlSAXParseDoc(NULL, cur, 0));
12760}
Daniel Veillard81273902003-09-30 00:43:48 +000012761#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012762
Daniel Veillard81273902003-09-30 00:43:48 +000012763#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012764/************************************************************************
12765 * *
12766 * Specific function to keep track of entities references *
12767 * and used by the XSLT debugger *
12768 * *
12769 ************************************************************************/
12770
12771static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12772
12773/**
12774 * xmlAddEntityReference:
12775 * @ent : A valid entity
12776 * @firstNode : A valid first node for children of entity
12777 * @lastNode : A valid last node of children entity
12778 *
12779 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12780 */
12781static void
12782xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12783 xmlNodePtr lastNode)
12784{
12785 if (xmlEntityRefFunc != NULL) {
12786 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12787 }
12788}
12789
12790
12791/**
12792 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012793 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012794 *
12795 * Set the function to call call back when a xml reference has been made
12796 */
12797void
12798xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12799{
12800 xmlEntityRefFunc = func;
12801}
Daniel Veillard81273902003-09-30 00:43:48 +000012802#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012803
12804/************************************************************************
12805 * *
12806 * Miscellaneous *
12807 * *
12808 ************************************************************************/
12809
12810#ifdef LIBXML_XPATH_ENABLED
12811#include <libxml/xpath.h>
12812#endif
12813
Daniel Veillardffa3c742005-07-21 13:24:09 +000012814extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012815static int xmlParserInitialized = 0;
12816
12817/**
12818 * xmlInitParser:
12819 *
12820 * Initialization function for the XML parser.
12821 * This is not reentrant. Call once before processing in case of
12822 * use in multithreaded programs.
12823 */
12824
12825void
12826xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012827 if (xmlParserInitialized != 0)
12828 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012829
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012830 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12831 (xmlGenericError == NULL))
12832 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012833 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012834 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012835 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012836 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012837 xmlDefaultSAXHandlerInit();
12838 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012839#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012840 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012841#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012842#ifdef LIBXML_HTML_ENABLED
12843 htmlInitAutoClose();
12844 htmlDefaultSAXHandlerInit();
12845#endif
12846#ifdef LIBXML_XPATH_ENABLED
12847 xmlXPathInit();
12848#endif
12849 xmlParserInitialized = 1;
12850}
12851
12852/**
12853 * xmlCleanupParser:
12854 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012855 * Cleanup function for the XML library. It tries to reclaim all
12856 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012857 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012858 * function should not prevent reusing the library but one should
12859 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012860 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012861 */
12862
12863void
12864xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012865 if (!xmlParserInitialized)
12866 return;
12867
Owen Taylor3473f882001-02-23 17:55:21 +000012868 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012869#ifdef LIBXML_CATALOG_ENABLED
12870 xmlCatalogCleanup();
12871#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012872 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012873 xmlCleanupInputCallbacks();
12874#ifdef LIBXML_OUTPUT_ENABLED
12875 xmlCleanupOutputCallbacks();
12876#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012877#ifdef LIBXML_SCHEMAS_ENABLED
12878 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012879 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012880#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012881 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012882 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012883 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012884 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012885 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012886}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012887
12888/************************************************************************
12889 * *
12890 * New set (2.6.0) of simpler and more flexible APIs *
12891 * *
12892 ************************************************************************/
12893
12894/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012895 * DICT_FREE:
12896 * @str: a string
12897 *
12898 * Free a string if it is not owned by the "dict" dictionnary in the
12899 * current scope
12900 */
12901#define DICT_FREE(str) \
12902 if ((str) && ((!dict) || \
12903 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12904 xmlFree((char *)(str));
12905
12906/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012907 * xmlCtxtReset:
12908 * @ctxt: an XML parser context
12909 *
12910 * Reset a parser context
12911 */
12912void
12913xmlCtxtReset(xmlParserCtxtPtr ctxt)
12914{
12915 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012916 xmlDictPtr dict;
12917
12918 if (ctxt == NULL)
12919 return;
12920
12921 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012922
12923 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12924 xmlFreeInputStream(input);
12925 }
12926 ctxt->inputNr = 0;
12927 ctxt->input = NULL;
12928
12929 ctxt->spaceNr = 0;
12930 ctxt->spaceTab[0] = -1;
12931 ctxt->space = &ctxt->spaceTab[0];
12932
12933
12934 ctxt->nodeNr = 0;
12935 ctxt->node = NULL;
12936
12937 ctxt->nameNr = 0;
12938 ctxt->name = NULL;
12939
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012940 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012941 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012942 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012943 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012944 DICT_FREE(ctxt->directory);
12945 ctxt->directory = NULL;
12946 DICT_FREE(ctxt->extSubURI);
12947 ctxt->extSubURI = NULL;
12948 DICT_FREE(ctxt->extSubSystem);
12949 ctxt->extSubSystem = NULL;
12950 if (ctxt->myDoc != NULL)
12951 xmlFreeDoc(ctxt->myDoc);
12952 ctxt->myDoc = NULL;
12953
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012954 ctxt->standalone = -1;
12955 ctxt->hasExternalSubset = 0;
12956 ctxt->hasPErefs = 0;
12957 ctxt->html = 0;
12958 ctxt->external = 0;
12959 ctxt->instate = XML_PARSER_START;
12960 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012961
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012962 ctxt->wellFormed = 1;
12963 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012964 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012965 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012966#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012967 ctxt->vctxt.userData = ctxt;
12968 ctxt->vctxt.error = xmlParserValidityError;
12969 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012970#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 ctxt->record_info = 0;
12972 ctxt->nbChars = 0;
12973 ctxt->checkIndex = 0;
12974 ctxt->inSubset = 0;
12975 ctxt->errNo = XML_ERR_OK;
12976 ctxt->depth = 0;
12977 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12978 ctxt->catalogs = NULL;
12979 xmlInitNodeInfoSeq(&ctxt->node_seq);
12980
12981 if (ctxt->attsDefault != NULL) {
12982 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12983 ctxt->attsDefault = NULL;
12984 }
12985 if (ctxt->attsSpecial != NULL) {
12986 xmlHashFree(ctxt->attsSpecial, NULL);
12987 ctxt->attsSpecial = NULL;
12988 }
12989
Daniel Veillard4432df22003-09-28 18:58:27 +000012990#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012991 if (ctxt->catalogs != NULL)
12992 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012993#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012994 if (ctxt->lastError.code != XML_ERR_OK)
12995 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012996}
12997
12998/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012999 * xmlCtxtResetPush:
13000 * @ctxt: an XML parser context
13001 * @chunk: a pointer to an array of chars
13002 * @size: number of chars in the array
13003 * @filename: an optional file name or URI
13004 * @encoding: the document encoding, or NULL
13005 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013006 * Reset a push parser context
13007 *
13008 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013009 */
13010int
13011xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13012 int size, const char *filename, const char *encoding)
13013{
13014 xmlParserInputPtr inputStream;
13015 xmlParserInputBufferPtr buf;
13016 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13017
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013018 if (ctxt == NULL)
13019 return(1);
13020
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013021 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13022 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13023
13024 buf = xmlAllocParserInputBuffer(enc);
13025 if (buf == NULL)
13026 return(1);
13027
13028 if (ctxt == NULL) {
13029 xmlFreeParserInputBuffer(buf);
13030 return(1);
13031 }
13032
13033 xmlCtxtReset(ctxt);
13034
13035 if (ctxt->pushTab == NULL) {
13036 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13037 sizeof(xmlChar *));
13038 if (ctxt->pushTab == NULL) {
13039 xmlErrMemory(ctxt, NULL);
13040 xmlFreeParserInputBuffer(buf);
13041 return(1);
13042 }
13043 }
13044
13045 if (filename == NULL) {
13046 ctxt->directory = NULL;
13047 } else {
13048 ctxt->directory = xmlParserGetDirectory(filename);
13049 }
13050
13051 inputStream = xmlNewInputStream(ctxt);
13052 if (inputStream == NULL) {
13053 xmlFreeParserInputBuffer(buf);
13054 return(1);
13055 }
13056
13057 if (filename == NULL)
13058 inputStream->filename = NULL;
13059 else
13060 inputStream->filename = (char *)
13061 xmlCanonicPath((const xmlChar *) filename);
13062 inputStream->buf = buf;
13063 inputStream->base = inputStream->buf->buffer->content;
13064 inputStream->cur = inputStream->buf->buffer->content;
13065 inputStream->end =
13066 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13067
13068 inputPush(ctxt, inputStream);
13069
13070 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13071 (ctxt->input->buf != NULL)) {
13072 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13073 int cur = ctxt->input->cur - ctxt->input->base;
13074
13075 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13076
13077 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13078 ctxt->input->cur = ctxt->input->base + cur;
13079 ctxt->input->end =
13080 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13081 use];
13082#ifdef DEBUG_PUSH
13083 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13084#endif
13085 }
13086
13087 if (encoding != NULL) {
13088 xmlCharEncodingHandlerPtr hdlr;
13089
13090 hdlr = xmlFindCharEncodingHandler(encoding);
13091 if (hdlr != NULL) {
13092 xmlSwitchToEncoding(ctxt, hdlr);
13093 } else {
13094 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13095 "Unsupported encoding %s\n", BAD_CAST encoding);
13096 }
13097 } else if (enc != XML_CHAR_ENCODING_NONE) {
13098 xmlSwitchEncoding(ctxt, enc);
13099 }
13100
13101 return(0);
13102}
13103
13104/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013105 * xmlCtxtUseOptions:
13106 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013107 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013108 *
13109 * Applies the options to the parser context
13110 *
13111 * Returns 0 in case of success, the set of unknown or unimplemented options
13112 * in case of error.
13113 */
13114int
13115xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13116{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013117 if (ctxt == NULL)
13118 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013119 if (options & XML_PARSE_RECOVER) {
13120 ctxt->recovery = 1;
13121 options -= XML_PARSE_RECOVER;
13122 } else
13123 ctxt->recovery = 0;
13124 if (options & XML_PARSE_DTDLOAD) {
13125 ctxt->loadsubset = XML_DETECT_IDS;
13126 options -= XML_PARSE_DTDLOAD;
13127 } else
13128 ctxt->loadsubset = 0;
13129 if (options & XML_PARSE_DTDATTR) {
13130 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13131 options -= XML_PARSE_DTDATTR;
13132 }
13133 if (options & XML_PARSE_NOENT) {
13134 ctxt->replaceEntities = 1;
13135 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13136 options -= XML_PARSE_NOENT;
13137 } else
13138 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013139 if (options & XML_PARSE_PEDANTIC) {
13140 ctxt->pedantic = 1;
13141 options -= XML_PARSE_PEDANTIC;
13142 } else
13143 ctxt->pedantic = 0;
13144 if (options & XML_PARSE_NOBLANKS) {
13145 ctxt->keepBlanks = 0;
13146 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13147 options -= XML_PARSE_NOBLANKS;
13148 } else
13149 ctxt->keepBlanks = 1;
13150 if (options & XML_PARSE_DTDVALID) {
13151 ctxt->validate = 1;
13152 if (options & XML_PARSE_NOWARNING)
13153 ctxt->vctxt.warning = NULL;
13154 if (options & XML_PARSE_NOERROR)
13155 ctxt->vctxt.error = NULL;
13156 options -= XML_PARSE_DTDVALID;
13157 } else
13158 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013159 if (options & XML_PARSE_NOWARNING) {
13160 ctxt->sax->warning = NULL;
13161 options -= XML_PARSE_NOWARNING;
13162 }
13163 if (options & XML_PARSE_NOERROR) {
13164 ctxt->sax->error = NULL;
13165 ctxt->sax->fatalError = NULL;
13166 options -= XML_PARSE_NOERROR;
13167 }
Daniel Veillard81273902003-09-30 00:43:48 +000013168#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013169 if (options & XML_PARSE_SAX1) {
13170 ctxt->sax->startElement = xmlSAX2StartElement;
13171 ctxt->sax->endElement = xmlSAX2EndElement;
13172 ctxt->sax->startElementNs = NULL;
13173 ctxt->sax->endElementNs = NULL;
13174 ctxt->sax->initialized = 1;
13175 options -= XML_PARSE_SAX1;
13176 }
Daniel Veillard81273902003-09-30 00:43:48 +000013177#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013178 if (options & XML_PARSE_NODICT) {
13179 ctxt->dictNames = 0;
13180 options -= XML_PARSE_NODICT;
13181 } else {
13182 ctxt->dictNames = 1;
13183 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013184 if (options & XML_PARSE_NOCDATA) {
13185 ctxt->sax->cdataBlock = NULL;
13186 options -= XML_PARSE_NOCDATA;
13187 }
13188 if (options & XML_PARSE_NSCLEAN) {
13189 ctxt->options |= XML_PARSE_NSCLEAN;
13190 options -= XML_PARSE_NSCLEAN;
13191 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013192 if (options & XML_PARSE_NONET) {
13193 ctxt->options |= XML_PARSE_NONET;
13194 options -= XML_PARSE_NONET;
13195 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013196 if (options & XML_PARSE_COMPACT) {
13197 ctxt->options |= XML_PARSE_COMPACT;
13198 options -= XML_PARSE_COMPACT;
13199 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013200 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013201 return (options);
13202}
13203
13204/**
13205 * xmlDoRead:
13206 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013207 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013208 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013209 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013210 * @reuse: keep the context for reuse
13211 *
13212 * Common front-end for the xmlRead functions
13213 *
13214 * Returns the resulting document tree or NULL
13215 */
13216static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013217xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13218 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013219{
13220 xmlDocPtr ret;
13221
13222 xmlCtxtUseOptions(ctxt, options);
13223 if (encoding != NULL) {
13224 xmlCharEncodingHandlerPtr hdlr;
13225
13226 hdlr = xmlFindCharEncodingHandler(encoding);
13227 if (hdlr != NULL)
13228 xmlSwitchToEncoding(ctxt, hdlr);
13229 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013230 if ((URL != NULL) && (ctxt->input != NULL) &&
13231 (ctxt->input->filename == NULL))
13232 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013233 xmlParseDocument(ctxt);
13234 if ((ctxt->wellFormed) || ctxt->recovery)
13235 ret = ctxt->myDoc;
13236 else {
13237 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013238 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013239 xmlFreeDoc(ctxt->myDoc);
13240 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013241 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013242 ctxt->myDoc = NULL;
13243 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013244 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013245 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246
13247 return (ret);
13248}
13249
13250/**
13251 * xmlReadDoc:
13252 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013253 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013254 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013255 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256 *
13257 * parse an XML in-memory document and build a tree.
13258 *
13259 * Returns the resulting document tree
13260 */
13261xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013262xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013263{
13264 xmlParserCtxtPtr ctxt;
13265
13266 if (cur == NULL)
13267 return (NULL);
13268
13269 ctxt = xmlCreateDocParserCtxt(cur);
13270 if (ctxt == NULL)
13271 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013272 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013273}
13274
13275/**
13276 * xmlReadFile:
13277 * @filename: a file or URL
13278 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013279 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 *
13281 * parse an XML file from the filesystem or the network.
13282 *
13283 * Returns the resulting document tree
13284 */
13285xmlDocPtr
13286xmlReadFile(const char *filename, const char *encoding, int options)
13287{
13288 xmlParserCtxtPtr ctxt;
13289
Daniel Veillard61b93382003-11-03 14:28:31 +000013290 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013291 if (ctxt == NULL)
13292 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013293 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013294}
13295
13296/**
13297 * xmlReadMemory:
13298 * @buffer: a pointer to a char array
13299 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013300 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013301 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013302 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013303 *
13304 * parse an XML in-memory document and build a tree.
13305 *
13306 * Returns the resulting document tree
13307 */
13308xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013309xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013310{
13311 xmlParserCtxtPtr ctxt;
13312
13313 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13314 if (ctxt == NULL)
13315 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013316 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013317}
13318
13319/**
13320 * xmlReadFd:
13321 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013322 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013323 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013324 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013325 *
13326 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013327 * NOTE that the file descriptor will not be closed when the
13328 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013329 *
13330 * Returns the resulting document tree
13331 */
13332xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013333xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013334{
13335 xmlParserCtxtPtr ctxt;
13336 xmlParserInputBufferPtr input;
13337 xmlParserInputPtr stream;
13338
13339 if (fd < 0)
13340 return (NULL);
13341
13342 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13343 if (input == NULL)
13344 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013345 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013346 ctxt = xmlNewParserCtxt();
13347 if (ctxt == NULL) {
13348 xmlFreeParserInputBuffer(input);
13349 return (NULL);
13350 }
13351 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13352 if (stream == NULL) {
13353 xmlFreeParserInputBuffer(input);
13354 xmlFreeParserCtxt(ctxt);
13355 return (NULL);
13356 }
13357 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013358 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013359}
13360
13361/**
13362 * xmlReadIO:
13363 * @ioread: an I/O read function
13364 * @ioclose: an I/O close function
13365 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013366 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013367 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013368 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013369 *
13370 * parse an XML document from I/O functions and source and build a tree.
13371 *
13372 * Returns the resulting document tree
13373 */
13374xmlDocPtr
13375xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013376 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013377{
13378 xmlParserCtxtPtr ctxt;
13379 xmlParserInputBufferPtr input;
13380 xmlParserInputPtr stream;
13381
13382 if (ioread == NULL)
13383 return (NULL);
13384
13385 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13386 XML_CHAR_ENCODING_NONE);
13387 if (input == NULL)
13388 return (NULL);
13389 ctxt = xmlNewParserCtxt();
13390 if (ctxt == NULL) {
13391 xmlFreeParserInputBuffer(input);
13392 return (NULL);
13393 }
13394 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13395 if (stream == NULL) {
13396 xmlFreeParserInputBuffer(input);
13397 xmlFreeParserCtxt(ctxt);
13398 return (NULL);
13399 }
13400 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013401 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013402}
13403
13404/**
13405 * xmlCtxtReadDoc:
13406 * @ctxt: an XML parser context
13407 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013408 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013409 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013410 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013411 *
13412 * parse an XML in-memory document and build a tree.
13413 * This reuses the existing @ctxt parser context
13414 *
13415 * Returns the resulting document tree
13416 */
13417xmlDocPtr
13418xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013419 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013420{
13421 xmlParserInputPtr stream;
13422
13423 if (cur == NULL)
13424 return (NULL);
13425 if (ctxt == NULL)
13426 return (NULL);
13427
13428 xmlCtxtReset(ctxt);
13429
13430 stream = xmlNewStringInputStream(ctxt, cur);
13431 if (stream == NULL) {
13432 return (NULL);
13433 }
13434 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013435 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013436}
13437
13438/**
13439 * xmlCtxtReadFile:
13440 * @ctxt: an XML parser context
13441 * @filename: a file or URL
13442 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013443 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013444 *
13445 * parse an XML file from the filesystem or the network.
13446 * This reuses the existing @ctxt parser context
13447 *
13448 * Returns the resulting document tree
13449 */
13450xmlDocPtr
13451xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13452 const char *encoding, int options)
13453{
13454 xmlParserInputPtr stream;
13455
13456 if (filename == NULL)
13457 return (NULL);
13458 if (ctxt == NULL)
13459 return (NULL);
13460
13461 xmlCtxtReset(ctxt);
13462
Daniel Veillard29614c72004-11-26 10:47:26 +000013463 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013464 if (stream == NULL) {
13465 return (NULL);
13466 }
13467 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013468 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013469}
13470
13471/**
13472 * xmlCtxtReadMemory:
13473 * @ctxt: an XML parser context
13474 * @buffer: a pointer to a char array
13475 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013476 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013477 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013478 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013479 *
13480 * parse an XML in-memory document and build a tree.
13481 * This reuses the existing @ctxt parser context
13482 *
13483 * Returns the resulting document tree
13484 */
13485xmlDocPtr
13486xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013487 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013488{
13489 xmlParserInputBufferPtr input;
13490 xmlParserInputPtr stream;
13491
13492 if (ctxt == NULL)
13493 return (NULL);
13494 if (buffer == NULL)
13495 return (NULL);
13496
13497 xmlCtxtReset(ctxt);
13498
13499 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13500 if (input == NULL) {
13501 return(NULL);
13502 }
13503
13504 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13505 if (stream == NULL) {
13506 xmlFreeParserInputBuffer(input);
13507 return(NULL);
13508 }
13509
13510 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013511 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013512}
13513
13514/**
13515 * xmlCtxtReadFd:
13516 * @ctxt: an XML parser context
13517 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013518 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013519 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013520 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013521 *
13522 * parse an XML from a file descriptor and build a tree.
13523 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013524 * NOTE that the file descriptor will not be closed when the
13525 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013526 *
13527 * Returns the resulting document tree
13528 */
13529xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013530xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13531 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013532{
13533 xmlParserInputBufferPtr input;
13534 xmlParserInputPtr stream;
13535
13536 if (fd < 0)
13537 return (NULL);
13538 if (ctxt == NULL)
13539 return (NULL);
13540
13541 xmlCtxtReset(ctxt);
13542
13543
13544 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13545 if (input == NULL)
13546 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013547 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013548 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13549 if (stream == NULL) {
13550 xmlFreeParserInputBuffer(input);
13551 return (NULL);
13552 }
13553 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013554 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013555}
13556
13557/**
13558 * xmlCtxtReadIO:
13559 * @ctxt: an XML parser context
13560 * @ioread: an I/O read function
13561 * @ioclose: an I/O close function
13562 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013563 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013564 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013565 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013566 *
13567 * parse an XML document from I/O functions and source and build a tree.
13568 * This reuses the existing @ctxt parser context
13569 *
13570 * Returns the resulting document tree
13571 */
13572xmlDocPtr
13573xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13574 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013575 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013576 const char *encoding, int options)
13577{
13578 xmlParserInputBufferPtr input;
13579 xmlParserInputPtr stream;
13580
13581 if (ioread == NULL)
13582 return (NULL);
13583 if (ctxt == NULL)
13584 return (NULL);
13585
13586 xmlCtxtReset(ctxt);
13587
13588 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13589 XML_CHAR_ENCODING_NONE);
13590 if (input == NULL)
13591 return (NULL);
13592 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13593 if (stream == NULL) {
13594 xmlFreeParserInputBuffer(input);
13595 return (NULL);
13596 }
13597 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013598 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013599}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013600
13601#define bottom_parser
13602#include "elfgcchack.h"